In [37]:
import emission.storage.decorations.location_queries as lq

In [38]:
reload(lq)


Out[38]:
<module 'emission.storage.decorations.location_queries' from '/Users/shankari/e-mission/e-mission-server/emission/storage/decorations/location_queries.pyc'>

In [39]:
%matplotlib inline

In [40]:
lq.get_uuid_list()


Out[40]:
[UUID('0763de67-f61e-3f5d-90e7-518e69793954'),
 UUID('f955cff1-8fb8-3b42-8776-6d8874e5b90a'),
 UUID('b0d937d0-70ef-305e-9563-440369012b39')]

In [41]:
import datetime as pydt

In [42]:
import pytz

In [43]:
get_jul_dt = lambda date: pydt.datetime(2015,7,date,tzinfo=pytz.timezone("America/Los_Angeles"))

In [44]:
get_aug_dt = lambda date: pydt.datetime(2015,8,date,tzinfo=pytz.timezone("America/Los_Angeles"))

In [45]:
import emission.analysis.plotting.leaflet_osm.our_plotter as lo

In [46]:
reload(lo)


Out[46]:
<module 'emission.analysis.plotting.leaflet_osm.our_plotter' from '/Users/shankari/e-mission/e-mission-server/emission/analysis/plotting/leaflet_osm/our_plotter.pyc'>

In [47]:
df = lq.get_plottable_df(lq.get_uuid_list()[0], "time", get_jul_dt(29), get_jul_dt(30))


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438156800000.0}}, {'data.mTime': {'$lt': 1438243200000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 

In [48]:
df.shape


Out[48]:
(205, 28)

In [49]:
df.mAccuracy.hist(bins=20)


Out[49]:
<matplotlib.axes.AxesSubplot at 0x109ddf9d0>

In [50]:
df.mAccuracy.quantile(0.97)


Out[50]:
140.5904799999999

In [51]:
import numpy as np

In [52]:
np.count_nonzero(df.mAccuracy > 100)


Out[52]:
12

In [53]:
np.count_nonzero(df.mAccuracy > 200)


Out[53]:
6

In [54]:
df[df.mAccuracy > 200]


Out[54]:
filter key mAccuracy mAltitude mBearing mDistance mElapsedRealtimeNanos mHasAccuracy mHasAltitude mHasBearing ... mLongitude mProvider mResults mSpeed mTime platform read_ts type write_ts formatted_time
60 time background/location 453.212 0 0 0 338915173000000 True False False ... -122.076251 fused [0, 0] 0 1438190005779 android 0 message 1438190006628 2015-07-29 10:13:25
65 time background/location 417.357 0 0 0 339063466000000 True False False ... -122.094375 fused [0, 0] 0 1438190154072 android 0 message 1438190157622 2015-07-29 10:15:54
75 time background/location 428.671 0 0 0 339363946000000 True False False ... -122.136062 fused [0, 0] 0 1438190454552 android 0 message 1438190457814 2015-07-29 10:20:54
155 time background/location 460.203 0 0 0 346412304000000 True False False ... -122.141467 fused [0, 0] 0 1438197502909 android 0 message 1438197504303 2015-07-29 12:18:22
159 time background/location 434.455 0 0 0 346532418000000 True False False ... -122.113038 fused [0, 0] 0 1438197623024 android 0 message 1438197624346 2015-07-29 12:20:23
164 time background/location 419.824 0 0 0 346682535000000 True False False ... -122.101123 fused [0, 0] 0 1438197773109 android 0 message 1438197774429 2015-07-29 12:22:53

6 rows × 28 columns


In [55]:
tidx = df[df.mAccuracy > 200].index; tidx


Out[55]:
Int64Index([60, 65, 75, 155, 159, 164], dtype='int64')

In [56]:
df.loc[tidx]


Out[56]:
filter key mAccuracy mAltitude mBearing mDistance mElapsedRealtimeNanos mHasAccuracy mHasAltitude mHasBearing ... mLongitude mProvider mResults mSpeed mTime platform read_ts type write_ts formatted_time
60 time background/location 453.212 0 0 0 338915173000000 True False False ... -122.076251 fused [0, 0] 0 1438190005779 android 0 message 1438190006628 2015-07-29 10:13:25
65 time background/location 417.357 0 0 0 339063466000000 True False False ... -122.094375 fused [0, 0] 0 1438190154072 android 0 message 1438190157622 2015-07-29 10:15:54
75 time background/location 428.671 0 0 0 339363946000000 True False False ... -122.136062 fused [0, 0] 0 1438190454552 android 0 message 1438190457814 2015-07-29 10:20:54
155 time background/location 460.203 0 0 0 346412304000000 True False False ... -122.141467 fused [0, 0] 0 1438197502909 android 0 message 1438197504303 2015-07-29 12:18:22
159 time background/location 434.455 0 0 0 346532418000000 True False False ... -122.113038 fused [0, 0] 0 1438197623024 android 0 message 1438197624346 2015-07-29 12:20:23
164 time background/location 419.824 0 0 0 346682535000000 True False False ... -122.101123 fused [0, 0] 0 1438197773109 android 0 message 1438197774429 2015-07-29 12:22:53

6 rows × 28 columns


In [57]:
max(tidx)


Out[57]:
164

In [58]:
min(tidx)


Out[58]:
60

In [59]:
def get_map_list(user_id, loc_filter, start_dt, end_dt):
    df = lq.get_plottable_df(user_id, loc_filter, start_dt, end_dt)
    sp = lq.get_potential_split_index(df)
    print "original split indices are %s" % sp
    print df.formatted_time.iloc[sp]
    return lo.get_map_list(df, sp)

In [60]:
def find_outlier_threshold(df_with_speeds):
    quartile_vals = df_with_speeds.quantile([0.25, 0.75]).speed
    print("quartile values are %s" % quartile_vals)
    iqr = quartile_vals.iloc[1] - quartile_vals.iloc[0]
    print("iqr %s" % iqr)
    return quartile_vals.iloc[1] + 3 * iqr

In [61]:
def find_outlier_threshold_simple(with_speeds_df):
    return with_speeds_df.speed.quantile(0.99)

In [62]:
def find_areas_of_interest(df):
    # Calculate speeds
    point_list = [AttrDict(row) for row in df.to_dict('records')]
    zipped_points_list = zip(point_list, point_list[1:])
    speeds = [calSpeed(p1, p2) for (p1, p2) in zipped_points_list]
    speeds.insert(0, 0)
    with_speeds_df = pd.concat([df, pd.Series(speeds, name="speed")], axis=1)
    # Calculate speed outliers using the InterQuartile Range https://en.wikipedia.org/wiki/Outlier
    # speedThreshold = find_outlier_threshold(with_speeds_df)
    speedThreshold = find_outlier_threshold_simple(with_speeds_df)
    with_speeds_df.speed.hist(bins=20)
    print("speedThreshold = %d" % speedThreshold)
    # Find points with speeds above 95%
    candidateIndices = np.nonzero(with_speeds_df.speed > speedThreshold)[0]
    print("Found %d potential outliers, list = %s" % (len(candidateIndices), candidateIndices))
    if len(candidateIndices == 1):
        candidateClusterCenters = [candidateIndices]
        print("Only one candidate, cluster centers are %s" % candidateClusterCenters)
    else:
        from sklearn.cluster import AffinityPropagation
        af = AffinityPropagation().fit([[i] for i in candidateIndices])
        candidateClusterCenters = af.cluster_centers_
        print("Found %d clusters with centers %s" % (len(candidateClusterCenters), candidateClusterCenters))
    dfList = []
    for cc in candidateClusterCenters:
        print("Considering candidate cluster center %s" % cc)
        lowRange = max(cc[0]-5,0)
        highRange = min(cc[0]+5,with_speeds_df.shape[0])
        print("lowRange = max(%s, %s) = %s and highRange = max(%s, %s) = %s" % (cc[0]-5,0,lowRange,cc[0]+5,with_speeds_df.shape[0],highRange))
        dfList.append(with_speeds_df.loc[lowRange:highRange])
    return dfList

In [63]:
def filter_ransac(df):
    from sklearn import linear_model
    import numpy as np
    latArr = [[lat] for lat in df.mLatitude.as_matrix()]
    lngArr = df.mLongitude.as_matrix()
    model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
    model_ransac.fit(latArr, lngArr)
    inlier_mask = model_ransac.inlier_mask_
    print "Deleted %d points through ransac filtering" % np.count_nonzero(np.logical_not(inlier_mask))
    return inlier_mask

In [64]:
def get_filtered_map_list(user_id, loc_filter, start_dt, end_dt):
    df = lq.get_plottable_df(user_id, loc_filter, start_dt, end_dt)
    accuracy_filtered_df = pd.DataFrame(df[df.mAccuracy < 200].to_dict('records'))
    print "filtering points %s" % df[df.mAccuracy > 200].index
    print "filtered list size went from %s to %s" % (df.shape, accuracy_filtered_df.shape)
    ransac_mask = pd.Series([True] * accuracy_filtered_df.shape[0])
    areas_of_interest = find_areas_of_interest(accuracy_filtered_df)
    for area in areas_of_interest:
        print ("Area size = %s, index = %s with size %s" % (area.shape[0], area.index, len(area.index)))
        retain_mask = filter_ransac(area)
        print ("Retain mask is of size %d" % len(retain_mask))
        ransac_mask[area.index] = retain_mask
    print ("Accuracy filtered df shape is %s, ransac_mask size = %s" % (accuracy_filtered_df.shape, len(ransac_mask)))
    filtered_df = accuracy_filtered_df[ransac_mask]
    sp = lq.get_potential_split_index(filtered_df)
    print "filtered split indices are %s" % sp
    print df.formatted_time.loc[sp]
    return lo.get_map_list(filtered_df, sp)

In [65]:
def get_filter_compare(user_id, loc_filter, start_dt, end_dt):
    unfiltered_maps = get_map_list(user_id, loc_filter, start_dt, end_dt)
    filtered_maps = get_filtered_map_list(user_id, loc_filter, start_dt, end_dt)
    return zip(unfiltered_maps, filtered_maps)

In [66]:
import emission.core.common as ec
def calSpeed(point1, point2):
    distanceDelta = ec.calDistance([point1.mLongitude, point1.mLatitude], [point2.mLongitude, point2.mLatitude])
    timeDelta = point2.mTime - point1.mTime
    # print "Distance delta = %s and time delta = %s" % (distanceDelta, timeDelta)
    # assert(timeDelta != 0)
    if (timeDelta == 0):
        return 0
    return distanceDelta/(float(timeDelta)/1000)

In [67]:
tom_dist_filter_df_23 = lq.get_plottable_df(lq.get_uuid_list()[2], "distance", get_jul_dt(23), get_jul_dt(24))


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 

In [68]:
import pandas as pd

In [69]:
filtered_tom_dist_filter_df_23 = pd.DataFrame(tom_dist_filter_df_23[tom_dist_filter_df_23.mAccuracy < 200].to_dict('records'))

In [70]:
filtered_tom_dist_filter_df_23.head()


Out[70]:
filter formatted_time key mAccuracy mAltitude mBearing mDistance mElapsedRealtimeNanos mHasAccuracy mHasAltitude ... mLon2 mLongitude mProvider mResults mSpeed mTime platform read_ts type write_ts
0 distance 2015-07-23 01:14:37 background/location 37.5 0.0 0 0 825072415000000 True False ... 0 -122.086260 fused [0, 0] 0.00 1437639277550 android 0 message 1437639277881
1 distance 2015-07-23 08:10:04 background/location 33.0 0.0 0 0 850002169000000 True False ... 0 -122.086260 fused [0, 0] 0.00 1437664204594 android 0 message 1437664209567
2 distance 2015-07-23 08:16:09 background/location 37.5 0.0 0 0 850367419000000 True False ... 0 -122.085437 fused [0, 0] 0.00 1437664569844 android 0 message 1437664570122
3 distance 2015-07-23 08:16:50 background/location 11.0 -5.8 0 0 850407758499656 True True ... 0 -122.085006 fused [0, 0] 0.00 1437664610773 android 0 message 1437664610912
4 distance 2015-07-23 08:17:06 background/location 4.0 -12.8 114 0 850423754226802 True True ... 0 -122.083913 fused [0, 0] 4.75 1437664626203 android 0 message 1437664626600

5 rows × 28 columns


In [71]:
filtered_tom_dist_filter_df_23.shape


Out[71]:
(205, 28)

In [72]:
from attrdict import AttrDict

In [73]:
point_list = [AttrDict(row) for row in filtered_tom_dist_filter_df_23.to_dict('records')]

In [74]:
zipped_points_list = zip(point_list, point_list[1:])

In [75]:
speeds = [calSpeed(p1, p2) for (p1, p2) in zipped_points_list]

In [76]:
len(speeds)


Out[76]:
204

In [77]:
speeds.insert(0, 0)

In [78]:
len(speeds)


Out[78]:
205

In [79]:
import pandas as pd

In [80]:
with_speeds_df = pd.concat([filtered_tom_dist_filter_df_23, pd.Series(speeds, name="speed")], axis=1)

In [81]:
with_speeds_df.speed.hist(bins=20)


Out[81]:
<matplotlib.axes.AxesSubplot at 0x10a104fd0>

In [82]:
with_speeds_df[with_speeds_df.speed > 100].index


Out[82]:
Int64Index([68, 69, 70], dtype='int64')

In [83]:
with_speeds_df.quantile([0.75,0.9,0.95,0.99]).speed


Out[83]:
0.75     20.066489
0.90     26.790328
0.95     30.764879
0.99    143.609764
Name: speed, dtype: float64

Box plot/inter quartile range based outlier detection


In [84]:
quartile_vals = with_speeds_df.quantile([0.25, 0.75]).speed

In [85]:
iqr = quartile_vals.iloc[1] - quartile_vals.iloc[0]

In [86]:
minor_outlier = quartile_vals.iloc[1] + 1.5 * iqr; minor_outlier


Out[86]:
45.929592976315917

In [87]:
major_outlier = quartile_vals.iloc[1] + 3 * iqr; major_outlier


Out[87]:
71.792697370328142

In [88]:
np.nonzero(with_speeds_df.speed > 144)


Out[88]:
(array([68, 69, 70]),)

In [89]:
[[pt] for pt in np.nonzero(with_speeds_df.speed > 144)[0]]


Out[89]:
[[68], [69], [70]]

In [90]:
from sklearn.cluster import AffinityPropagation
af = AffinityPropagation().fit(np.array([[68],[69],[70],[100],[101],[102]]))
(af.cluster_centers_, af.labels_, af.get_params)


Out[90]:
(array([[ 69],
        [101]]),
 array([0, 0, 0, 1, 1, 1]),
 <bound method AffinityPropagation.get_params of AffinityPropagation(affinity='euclidean', convergence_iter=15, copy=True,
          damping=0.5, max_iter=200, preference=None, verbose=False)>)

In [91]:
for cc in af.cluster_centers_:
    print cc


[69]
[101]

In [92]:
area_of_interest = with_speeds_df[65:75][["mLatitude", "mLongitude", "speed", "mTime", "formatted_time"]]

In [93]:
area_of_interest


Out[93]:
mLatitude mLongitude speed mTime formatted_time
65 37.710586 -122.466058 6.079706 1437669142163 2015-07-23 09:32:22
66 37.711364 -122.453417 17.466772 1437669206020 2015-07-23 09:33:26
67 37.714813 -122.449480 15.055191 1437669240343 2015-07-23 09:34:00
68 37.600321 -122.386934 147.878263 1437669334140 2015-07-23 09:35:34
69 37.727916 -122.443265 500.713177 1437669364156 2015-07-23 09:36:04
70 37.600319 -122.386934 235.091036 1437669428087 2015-07-23 09:37:08
71 37.600321 -122.386934 0.000399 1437669992128 2015-07-23 09:46:32
72 37.807162 -122.301494 38.963991 1437670613132 2015-07-23 09:56:53
73 37.805007 -122.295444 18.749856 1437670644228 2015-07-23 09:57:24
74 37.804614 -122.294252 3.653723 1437670675275 2015-07-23 09:57:55

In [94]:
calSpeed(area_of_interest.loc[67], area_of_interest.loc[68])


Out[94]:
147.87826256081783

In [95]:
calSpeed(area_of_interest.loc[68], area_of_interest.loc[69])


Out[95]:
500.71317660657405

In [96]:
calSpeed(area_of_interest.loc[69], area_of_interest.loc[70])


Out[96]:
235.0910364959406

In [97]:
calSpeed(area_of_interest.loc[70], area_of_interest.loc[71])


Out[97]:
0.00039919882482724163

In [98]:
calSpeed(area_of_interest.loc[71], area_of_interest.loc[72])


Out[98]:
38.963990634576

In [99]:
import folium

In [100]:
m = folium.Map([area_of_interest.mLatitude.mean(), area_of_interest.mLongitude.mean()])

In [101]:
m.div_markers(area_of_interest[["mLatitude", "mLongitude"]].as_matrix().tolist(), lo.df_to_string_list(area_of_interest))
m.line(area_of_interest[["mLatitude", "mLongitude"]].as_matrix().tolist())

In [102]:
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [103]:
reload(ipy)


Out[103]:
<module 'emission.analysis.plotting.leaflet_osm.ipython_helper' from '/Users/shankari/e-mission/e-mission-server/emission/analysis/plotting/leaflet_osm/ipython_helper.pyc'>

In [104]:
ipy.inline_map(m)


Out[104]:

In [105]:
def smooth_boundary(point_df, maxSpeed = 33):
    prev_pt = None
    removed_indices = []
    for (i, pt) in enumerate(point_df[["mLatitude", "mLongitude", "mTime", "speed"]].to_dict('records')):
        pt = AttrDict(dict(pt))
        if prev_pt is None:
            # Don't have enough data yet, so don't make any decisions
            prev_pt = pt
        else:
            currSpeed = calSpeed(prev_pt, pt)
            print("while considering point %s(%s), prev_pt (%s) speed = %s" % (pt, i, prev_pt, currSpeed))
            # Should make this configurable
            if currSpeed > maxSpeed:
                print("currSpeed > 50, removing index %s " % (i))
                removed_indices.append(i)
            else:
                print("currSpeed < 50, retaining index %s " % (i))
                prev_pt = pt
    retained_indices = point_df.index.delete(removed_indices)
    return (retained_indices, removed_indices)

In [106]:
boundary_retained = smooth_boundary(area_of_interest, maxSpeed = 33)


while considering point AttrDict({'speed': 17.466771902852305, u'mLongitude': -122.45341689999999, u'mLatitude': 37.711364099999997, u'mTime': 1437669206020.0})(1), prev_pt (AttrDict({'speed': 6.0797060443101714, u'mLongitude': -122.46605820000001, u'mLatitude': 37.710586300000003, u'mTime': 1437669142163.0})) speed = 17.4667719029
currSpeed < 50, retaining index 1 
while considering point AttrDict({'speed': 15.055191127656343, u'mLongitude': -122.4494797, u'mLatitude': 37.714812999999999, u'mTime': 1437669240343.0})(2), prev_pt (AttrDict({'speed': 17.466771902852305, u'mLongitude': -122.45341689999999, u'mLatitude': 37.711364099999997, u'mTime': 1437669206020.0})) speed = 15.0551911277
currSpeed < 50, retaining index 2 
while considering point AttrDict({'speed': 147.87826256081783, u'mLongitude': -122.3869339, u'mLatitude': 37.600321000000001, u'mTime': 1437669334140.0})(3), prev_pt (AttrDict({'speed': 15.055191127656343, u'mLongitude': -122.4494797, u'mLatitude': 37.714812999999999, u'mTime': 1437669240343.0})) speed = 147.878262561
currSpeed > 50, removing index 3 
while considering point AttrDict({'speed': 500.71317660657405, u'mLongitude': -122.44326460000001, u'mLatitude': 37.727916200000003, u'mTime': 1437669364156.0})(4), prev_pt (AttrDict({'speed': 15.055191127656343, u'mLongitude': -122.4494797, u'mLatitude': 37.714812999999999, u'mTime': 1437669240343.0})) speed = 12.5688015154
currSpeed < 50, retaining index 4 
while considering point AttrDict({'speed': 235.09103649594061, u'mLongitude': -122.38693429999999, u'mLatitude': 37.600318999999999, u'mTime': 1437669428087.0})(5), prev_pt (AttrDict({'speed': 500.71317660657405, u'mLongitude': -122.44326460000001, u'mLatitude': 37.727916200000003, u'mTime': 1437669364156.0})) speed = 235.091036496
currSpeed > 50, removing index 5 
while considering point AttrDict({'speed': 0.00039919882482724163, u'mLongitude': -122.3869339, u'mLatitude': 37.600321000000001, u'mTime': 1437669992128.0})(6), prev_pt (AttrDict({'speed': 500.71317660657405, u'mLongitude': -122.44326460000001, u'mLatitude': 37.727916200000003, u'mTime': 1437669364156.0})) speed = 23.9332433755
currSpeed < 50, retaining index 6 
while considering point AttrDict({'speed': 38.963990634576, u'mLongitude': -122.3014936, u'mLatitude': 37.807161999999998, u'mTime': 1437670613132.0})(7), prev_pt (AttrDict({'speed': 0.00039919882482724163, u'mLongitude': -122.3869339, u'mLatitude': 37.600321000000001, u'mTime': 1437669992128.0})) speed = 38.9639906346
currSpeed > 50, removing index 7 
while considering point AttrDict({'speed': 18.749856413275594, u'mLongitude': -122.29544370000001, u'mLatitude': 37.805006599999999, u'mTime': 1437670644228.0})(8), prev_pt (AttrDict({'speed': 0.00039919882482724163, u'mLongitude': -122.3869339, u'mLatitude': 37.600321000000001, u'mTime': 1437669992128.0})) speed = 37.0209132447
currSpeed > 50, removing index 8 
while considering point AttrDict({'speed': 3.6537232051843778, u'mLongitude': -122.2942517, u'mLatitude': 37.8046145, u'mTime': 1437670675275.0})(9), prev_pt (AttrDict({'speed': 0.00039919882482724163, u'mLongitude': -122.3869339, u'mLatitude': 37.600321000000001, u'mTime': 1437669992128.0})) speed = 35.3298349686
currSpeed > 50, removing index 9 

In [107]:
boundary_retained_df = area_of_interest.loc[boundary_retained[0]]
m = folium.Map([boundary_retained_df.mLatitude.mean(), boundary_retained_df.mLongitude.mean()])
m.div_markers(boundary_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist(), lo.df_to_string_list(boundary_retained_df))
m.line(boundary_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist())
ipy.inline_map(m)


Out[107]:

In [108]:
import math

In [109]:
def smooth_posdap(points_df, maxSpeed = 150):
    quality_segments = []
    curr_segment = []
    prev_pt = None
   
    for (i, pt) in enumerate(points_df.to_dict('records')):
        pt = AttrDict(pt)
        if prev_pt is None:
            # Don't have enough data yet, so don't make any decisions
            prev_pt = pt
        else:
            currSpeed = calSpeed(prev_pt, pt)
            print("while considering point %s, speed = %s" % (i, currSpeed))
            # Should make this configurable
            if currSpeed > maxSpeed:
                print("currSpeed > %d, starting new quality segment at index %s " % (maxSpeed, i))
                quality_segments.append(curr_segment)
                curr_segment = []
            else:
                print("currSpeed < %d, retaining index %s in existing quality segment " % (maxSpeed, i))
            prev_pt = pt
            curr_segment.append(i)
    # Append the last segment once we are at the end
    quality_segments.append(curr_segment)

    print("Number of quality segments is %d" % len(quality_segments))

    last_segment = quality_segments[0]
    removed_indices = []
    for curr_segment in quality_segments[1:]:
        print("Considering segments %s and %s" % (last_segment, curr_segment))
        get_coords = lambda(i): [points_df.iloc[i]["mLongitude"], points_df.iloc[i]["mLatitude"]]
        get_ts = lambda(i): (points_df.iloc[i]["mTime"]/1000)
        # I don't know why they would use time instead of distance, but
        # this is what the existing POSDAP code does.
        print("About to compare curr_segment duration %s with last segment duration %s" %
                        (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]),
                         get_ts(last_segment[-1]) - get_ts(last_segment[0])))
        if (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]) <=
            get_ts(last_segment[-1]) - get_ts(last_segment[0])):
            print("curr segment %s is shorter, cut it" % curr_segment)
            ref_idx = last_segment[-1]
            for curr_idx in curr_segment:
                print("Comparing distance %s with speed %s * time %s = %s" %
                    (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))),
                     maxSpeed, abs(get_ts(ref_idx) - get_ts(curr_idx)),
                     maxSpeed * abs(get_ts(ref_idx) - get_ts(curr_idx))))

                if (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) >
                    (maxSpeed * abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                    print("Distance is greater than max speed * time, deleting %s" % curr_idx)
                    removed_indices.append(curr_idx)
        else:
            print("prev segment %s is shorter, cut it" % last_segment)
            ref_idx = curr_segment[-1]
            for curr_idx in reversed(last_segment):
                print("Comparing distance %s with speed %s * time %s = %s" %
                    (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))),
                     maxSpeed, abs(get_ts(ref_idx) - get_ts(curr_idx)),
                     maxSpeed * abs(get_ts(ref_idx) - get_ts(curr_idx))))
                if (abs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) >
                    (maxSpeed *  abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                    print("Distance is greater than max speed * time, deleting %s" % curr_idx)
                    removed_indices.append(curr_idx)
        last_segment = curr_segment

    retained_indices = points_df.index.delete(removed_indices)
    return (retained_indices, removed_indices)

In [110]:
posdap_retained = smooth_posdap(area_of_interest, maxSpeed=100)
print posdap_retained
posdap_retained_df = area_of_interest.loc[posdap_retained[0]]
m = folium.Map([posdap_retained_df.mLatitude.mean(), posdap_retained_df.mLongitude.mean()])
m.div_markers(posdap_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist(), lo.df_to_string_list(posdap_retained_df))
m.line(posdap_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist())
ipy.inline_map(m)


while considering point 1, speed = 17.4667719029
currSpeed < 100, retaining index 1 in existing quality segment 
while considering point 2, speed = 15.0551911277
currSpeed < 100, retaining index 2 in existing quality segment 
while considering point 3, speed = 147.878262561
currSpeed > 100, starting new quality segment at index 3 
while considering point 4, speed = 500.713176607
currSpeed > 100, starting new quality segment at index 4 
while considering point 5, speed = 235.091036496
currSpeed > 100, starting new quality segment at index 5 
while considering point 6, speed = 0.000399198824827
currSpeed < 100, retaining index 6 in existing quality segment 
while considering point 7, speed = 38.9639906346
currSpeed < 100, retaining index 7 in existing quality segment 
while considering point 8, speed = 18.7498564133
currSpeed < 100, retaining index 8 in existing quality segment 
while considering point 9, speed = 3.65372320518
currSpeed < 100, retaining index 9 in existing quality segment 
Number of quality segments is 4
Considering segments [1, 2] and [3]
About to compare curr_segment duration 0 with last segment duration 34
curr segment [3] is shorter, cut it
Comparing distance 13870.5373934 with speed 100 * time 94 = 9400
Distance is greater than max speed * time, deleting 3
Considering segments [3] and [4]
About to compare curr_segment duration 0 with last segment duration 0
curr segment [4] is shorter, cut it
Comparing distance 15029.406709 with speed 100 * time 30 = 3000
Distance is greater than max speed * time, deleting 4
Considering segments [4] and [5, 6, 7, 8, 9]
About to compare curr_segment duration 1247 with last segment duration 0
prev segment [4] is shorter, cut it
Comparing distance 15630.2116469 with speed 100 * time 1311 = 131100
(Int64Index([65, 66, 67, 70, 71, 72, 73, 74], dtype='int64'), [3, 4])
Out[110]:

In [111]:
posdap_retained = smooth_posdap(area_of_interest, maxSpeed=35)
print posdap_retained
posdap_retained_df = area_of_interest.loc[posdap_retained[0]]
m = folium.Map([posdap_retained_df.mLatitude.mean(), posdap_retained_df.mLongitude.mean()])
m.div_markers(posdap_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist(), lo.df_to_string_list(posdap_retained_df))
m.line(posdap_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist())
ipy.inline_map(m)


while considering point 1, speed = 17.4667719029
currSpeed < 35, retaining index 1 in existing quality segment 
while considering point 2, speed = 15.0551911277
currSpeed < 35, retaining index 2 in existing quality segment 
while considering point 3, speed = 147.878262561
currSpeed > 35, starting new quality segment at index 3 
while considering point 4, speed = 500.713176607
currSpeed > 35, starting new quality segment at index 4 
while considering point 5, speed = 235.091036496
currSpeed > 35, starting new quality segment at index 5 
while considering point 6, speed = 0.000399198824827
currSpeed < 35, retaining index 6 in existing quality segment 
while considering point 7, speed = 38.9639906346
currSpeed > 35, starting new quality segment at index 7 
while considering point 8, speed = 18.7498564133
currSpeed < 35, retaining index 8 in existing quality segment 
while considering point 9, speed = 3.65372320518
currSpeed < 35, retaining index 9 in existing quality segment 
Number of quality segments is 5
Considering segments [1, 2] and [3]
About to compare curr_segment duration 0 with last segment duration 34
curr segment [3] is shorter, cut it
Comparing distance 13870.5373934 with speed 35 * time 94 = 3290
Distance is greater than max speed * time, deleting 3
Considering segments [3] and [4]
About to compare curr_segment duration 0 with last segment duration 0
curr segment [4] is shorter, cut it
Comparing distance 15029.406709 with speed 35 * time 30 = 1050
Distance is greater than max speed * time, deleting 4
Considering segments [4] and [5, 6]
About to compare curr_segment duration 564 with last segment duration 0
prev segment [4] is shorter, cut it
Comparing distance 15029.406709 with speed 35 * time 628 = 21980
Considering segments [5, 6] and [7, 8, 9]
About to compare curr_segment duration 62 with last segment duration 564
curr segment [7, 8, 9] is shorter, cut it
Comparing distance 24196.79404 with speed 35 * time 621 = 21735
Distance is greater than max speed * time, deleting 7
Comparing distance 24141.3375268 with speed 35 * time 652 = 22820
Distance is greater than max speed * time, deleting 8
Comparing distance 24135.4707693 with speed 35 * time 683 = 23905
Distance is greater than max speed * time, deleting 9
(Int64Index([65, 66, 67, 70, 71], dtype='int64'), [3, 4, 7, 8, 9])
Out[111]:

In [112]:
def smooth_posdap(points_df, maxSpeed = 150):
    quality_segments = []
    curr_segment = []
    prev_pt = None
   
    for (i, pt) in enumerate(points_df.to_dict('records')):
        pt = AttrDict(pt)
        if prev_pt is None:
            # Don't have enough data yet, so don't make any decisions
            prev_pt = pt
        else:
            currSpeed = calSpeed(prev_pt, pt)
            print("while considering point %s, speed = %s" % (i, currSpeed))
            # Should make this configurable
            if currSpeed > maxSpeed:
                print("currSpeed > %d, starting new quality segment at index %s " % (maxSpeed, i))
                quality_segments.append(curr_segment)
                curr_segment = []
            else:
                print("currSpeed < %d, retaining index %s in existing quality segment " % (maxSpeed, i))
            prev_pt = pt
            curr_segment.append(i)
    # Append the last segment once we are at the end
    quality_segments.append(curr_segment)

    print("Number of quality segments is %d" % len(quality_segments))

    last_segment = quality_segments[0]
    removed_indices = []
    for curr_segment in quality_segments[1:]:
        print("Considering segments %s and %s" % (last_segment, curr_segment))
        get_coords = lambda(i): [points_df.iloc[i]["mLongitude"], points_df.iloc[i]["mLatitude"]]
        get_ts = lambda(i): (points_df.iloc[i]["mTime"]/1000)
        # I don't know why they would use time instead of distance, but
        # this is what the existing POSDAP code does.
        print("About to compare curr_segment duration %s with last segment duration %s" %
                        (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]),
                         get_ts(last_segment[-1]) - get_ts(last_segment[0])))
        if (get_ts(curr_segment[-1]) - get_ts(curr_segment[0]) <=
            get_ts(last_segment[-1]) - get_ts(last_segment[0])):
            print("curr segment %s is shorter, cut it" % curr_segment)
            ref_idx = last_segment[-1]
            for curr_idx in curr_segment:
                print("Comparing distance %s with speed %s * time %s = %s" %
                    (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))),
                     maxSpeed, abs(get_ts(ref_idx) - get_ts(curr_idx)),
                     maxSpeed * abs(get_ts(ref_idx) - get_ts(curr_idx))))

                if (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) >
                    (maxSpeed * abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                    print("Distance is greater than max speed * time, deleting %s" % curr_idx)
                    removed_indices.append(curr_idx)
        else:
            print("prev segment %s is shorter, cut it" % last_segment)
            ref_idx = curr_segment[-1]
            for curr_idx in reversed(last_segment):
                print("Comparing distance %s with speed %s * time %s = %s" %
                    (math.fabs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))),
                     maxSpeed, abs(get_ts(ref_idx) - get_ts(curr_idx)),
                     maxSpeed * abs(get_ts(ref_idx) - get_ts(curr_idx))))
                if (abs(ec.calDistance(get_coords(ref_idx), get_coords(curr_idx))) >
                    (maxSpeed *  abs(get_ts(ref_idx) - get_ts(curr_idx)))):
                    print("Distance is greater than max speed * time, deleting %s" % curr_idx)
                    removed_indices.append(curr_idx)
            last_segment = curr_segment

    retained_indices = points_df.index.delete(removed_indices)
    return (retained_indices, removed_indices)

In [113]:
posdap_retained = smooth_posdap(area_of_interest, maxSpeed=35)
print posdap_retained
posdap_retained_df = area_of_interest.loc[posdap_retained[0]]
m = folium.Map([posdap_retained_df.mLatitude.mean(), posdap_retained_df.mLongitude.mean()])
m.div_markers(posdap_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist(), lo.df_to_string_list(posdap_retained_df))
m.line(posdap_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist())
ipy.inline_map(m)


while considering point 1, speed = 17.4667719029
currSpeed < 35, retaining index 1 in existing quality segment 
while considering point 2, speed = 15.0551911277
currSpeed < 35, retaining index 2 in existing quality segment 
while considering point 3, speed = 147.878262561
currSpeed > 35, starting new quality segment at index 3 
while considering point 4, speed = 500.713176607
currSpeed > 35, starting new quality segment at index 4 
while considering point 5, speed = 235.091036496
currSpeed > 35, starting new quality segment at index 5 
while considering point 6, speed = 0.000399198824827
currSpeed < 35, retaining index 6 in existing quality segment 
while considering point 7, speed = 38.9639906346
currSpeed > 35, starting new quality segment at index 7 
while considering point 8, speed = 18.7498564133
currSpeed < 35, retaining index 8 in existing quality segment 
while considering point 9, speed = 3.65372320518
currSpeed < 35, retaining index 9 in existing quality segment 
Number of quality segments is 5
Considering segments [1, 2] and [3]
About to compare curr_segment duration 0 with last segment duration 34
curr segment [3] is shorter, cut it
Comparing distance 13870.5373934 with speed 35 * time 94 = 3290
Distance is greater than max speed * time, deleting 3
Considering segments [1, 2] and [4]
About to compare curr_segment duration 0 with last segment duration 34
curr segment [4] is shorter, cut it
Comparing distance 1556.18102202 with speed 35 * time 124 = 4340
Considering segments [1, 2] and [5, 6]
About to compare curr_segment duration 564 with last segment duration 34
prev segment [1, 2] is shorter, cut it
Comparing distance 13870.5373934 with speed 35 * time 752 = 26320
Comparing distance 13664.2820654 with speed 35 * time 786 = 27510
Considering segments [5, 6] and [7, 8, 9]
About to compare curr_segment duration 62 with last segment duration 564
curr segment [7, 8, 9] is shorter, cut it
Comparing distance 24196.79404 with speed 35 * time 621 = 21735
Distance is greater than max speed * time, deleting 7
Comparing distance 24141.3375268 with speed 35 * time 652 = 22820
Distance is greater than max speed * time, deleting 8
Comparing distance 24135.4707693 with speed 35 * time 683 = 23905
Distance is greater than max speed * time, deleting 9
(Int64Index([65, 66, 67, 69, 70, 71], dtype='int64'), [3, 7, 8, 9])
Out[113]:

In [114]:
from sklearn import datasets
X, y, coef = datasets.make_regression(n_samples=10, n_features=1,
                                      n_informative=1, noise=10,
                                      coef=True, random_state=0)
print X, y, coef


[[ 0.40015721]
 [-0.10321885]
 [ 0.4105985 ]
 [-0.97727788]
 [ 0.97873798]
 [ 0.95008842]
 [ 2.2408932 ]
 [-0.15135721]
 [ 1.86755799]
 [ 1.76405235]] [  42.48956723  -17.77964105   36.27738121  -63.27149035   82.33225832
   71.47616194  183.20853013   -9.23134599  146.04341636  165.11644886] 79.1725038083

In [115]:
from sklearn import linear_model
latArr = [[lat] for lat in area_of_interest.mLatitude.as_matrix()]; latArr


Out[115]:
[[37.710586300000003],
 [37.711364099999997],
 [37.714812999999999],
 [37.600321000000001],
 [37.727916200000003],
 [37.600318999999999],
 [37.600321000000001],
 [37.807161999999998],
 [37.805006599999999],
 [37.8046145]]

In [116]:
lngArr = area_of_interest.mLongitude.as_matrix()

In [117]:
model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
model_ransac.fit(latArr, lngArr)
inlier_mask = model_ransac.inlier_mask_
print inlier_mask


[ True  True  True False  True False False  True  True  True]

In [118]:
area_of_interest[inlier_mask]


Out[118]:
mLatitude mLongitude speed mTime formatted_time
65 37.710586 -122.466058 6.079706 1437669142163 2015-07-23 09:32:22
66 37.711364 -122.453417 17.466772 1437669206020 2015-07-23 09:33:26
67 37.714813 -122.449480 15.055191 1437669240343 2015-07-23 09:34:00
69 37.727916 -122.443265 500.713177 1437669364156 2015-07-23 09:36:04
72 37.807162 -122.301494 38.963991 1437670613132 2015-07-23 09:56:53
73 37.805007 -122.295444 18.749856 1437670644228 2015-07-23 09:57:24
74 37.804614 -122.294252 3.653723 1437670675275 2015-07-23 09:57:55

In [119]:
ransac_retained_df = area_of_interest.loc[inlier_mask]
m = folium.Map([ransac_retained_df.mLatitude.mean(), ransac_retained_df.mLongitude.mean()])
m.div_markers(ransac_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist(), lo.df_to_string_list(ransac_retained_df))
m.line(ransac_retained_df[["mLatitude", "mLongitude"]].as_matrix().tolist())
ipy.inline_map(m)


Out[119]:

In [120]:
my_time_filter_map_list_21 = get_filter_compare(lq.get_uuid_list()[0], "time", get_jul_dt(21), get_jul_dt(22))
ipy.inline_maps(my_time_filter_map_list_21, len(my_time_filter_map_list_21), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437465600000.0}}, {'data.mTime': {'$lt': 1437552000000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 484, 494, 523, 641, 923, 935, 1030, 1040, 1080, 1091, 1104, 1114], dtype='int64')
0      2015-07-21 08:22:15
484    2015-07-21 12:30:27
494    2015-07-21 13:21:30
523    2015-07-21 13:30:29
641    2015-07-21 14:41:11
923    2015-07-21 15:46:13
935    2015-07-21 15:54:49
1030   2015-07-21 17:38:30
1040   2015-07-21 18:07:49
1080   2015-07-21 18:19:38
1091   2015-07-21 18:47:23
1104   2015-07-21 19:00:12
1114   2015-07-21 19:02:22
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-21 08:22:15 to 2015-07-21 12:30:27 because start = 0 and end = 484
Considering trip from 2015-07-21 12:30:27 to 2015-07-21 13:21:30 because start = 484 and end = 494
Considering trip from 2015-07-21 13:21:30 to 2015-07-21 13:30:29 because start = 494 and end = 523
Considering trip from 2015-07-21 13:30:29 to 2015-07-21 14:41:11 because start = 523 and end = 641
Considering trip from 2015-07-21 14:41:11 to 2015-07-21 15:46:13 because start = 641 and end = 923
Considering trip from 2015-07-21 15:46:13 to 2015-07-21 15:54:49 because start = 923 and end = 935
Considering trip from 2015-07-21 15:54:49 to 2015-07-21 17:38:30 because start = 935 and end = 1030
Considering trip from 2015-07-21 17:38:30 to 2015-07-21 18:07:49 because start = 1030 and end = 1040
Considering trip from 2015-07-21 18:07:49 to 2015-07-21 18:19:38 because start = 1040 and end = 1080
Considering trip from 2015-07-21 18:19:38 to 2015-07-21 18:47:23 because start = 1080 and end = 1091
Considering trip from 2015-07-21 18:47:23 to 2015-07-21 19:00:12 because start = 1091 and end = 1104
Considering trip from 2015-07-21 19:00:12 to 2015-07-21 19:02:22 because start = 1104 and end = 1114
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437465600000.0}}, {'data.mTime': {'$lt': 1437552000000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
filtering points Int64Index([239, 484, 523, 532, 561, 564, 702, 796, 797, 798, 815, 833, 855, 856, 857, 858, 859, 860, 923, 924, 928, 1003, 1030, 1104], dtype='int64')
filtered list size went from (1115, 28) to (1091, 28)
speedThreshold = 78
Found 11 potential outliers, list = [129 439 442 531 537 562 585 757 763 784 808]
Only one candidate, cluster centers are [array([129, 439, 442, 531, 537, 562, 585, 757, 763, 784, 808])]
Considering candidate cluster center [129 439 442 531 537 562 585 757 763 784 808]
lowRange = max(124, 0) = 124 and highRange = max(134, 1091) = 134
Area size = 11, index = Int64Index([124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (1091, 28), ransac_mask size = 1091
filtered split indices are Int64Index([0, 483, 492, 521, 635, 905, 914, 1008, 1017, 1057, 1068, 1081, 1090], dtype='int64')
0      2015-07-21 08:22:15
483    2015-07-21 09:47:41
492    2015-07-21 12:31:12
521    2015-07-21 13:25:22
635    2015-07-21 13:48:09
905    2015-07-21 15:34:08
914    2015-07-21 15:35:08
1008   2015-07-21 16:10:59
1017   2015-07-21 16:13:07
1057   2015-07-21 18:09:37
1068   2015-07-21 18:12:18
1081   2015-07-21 18:20:25
1090   2015-07-21 18:21:27
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-21 08:22:15 to 2015-07-21 12:30:31 because start = 0 and end = 483
Considering trip from 2015-07-21 12:30:31 to 2015-07-21 13:21:30 because start = 483 and end = 492
Considering trip from 2015-07-21 13:21:30 to 2015-07-21 13:30:32 because start = 492 and end = 521
Considering trip from 2015-07-21 13:30:32 to 2015-07-21 14:41:11 because start = 521 and end = 635
Considering trip from 2015-07-21 14:41:11 to 2015-07-21 15:47:39 because start = 635 and end = 905
Considering trip from 2015-07-21 15:47:39 to 2015-07-21 15:54:49 because start = 905 and end = 914
Considering trip from 2015-07-21 15:54:49 to 2015-07-21 17:38:34 because start = 914 and end = 1008
Considering trip from 2015-07-21 17:38:34 to 2015-07-21 18:07:49 because start = 1008 and end = 1017
Considering trip from 2015-07-21 18:07:49 to 2015-07-21 18:19:38 because start = 1017 and end = 1057
Considering trip from 2015-07-21 18:19:38 to 2015-07-21 18:47:23 because start = 1057 and end = 1068
Considering trip from 2015-07-21 18:47:23 to 2015-07-21 19:01:12 because start = 1068 and end = 1081
Considering trip from 2015-07-21 19:01:12 to 2015-07-21 19:02:22 because start = 1081 and end = 1090
Out[120]:

In [121]:
my_dist_filter_map_list_21 = get_filter_compare(lq.get_uuid_list()[0], "distance", get_jul_dt(21), get_jul_dt(22))
ipy.inline_maps(my_dist_filter_map_list_21, len(my_dist_filter_map_list_21), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437465600000.0}}, {'data.mTime': {'$lt': 1437552000000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 1, 103, 106, 112, 145, 213, 216, 235, 237, 245, 247, 251, 252], dtype='int64')
0     2015-07-21 08:22:15
1     2015-07-21 08:46:30
103   2015-07-21 12:30:27
106   2015-07-21 13:21:30
112   2015-07-21 13:30:29
145   2015-07-21 14:41:11
213   2015-07-21 15:46:13
216   2015-07-21 15:54:49
235   2015-07-21 17:38:30
237   2015-07-21 18:07:49
245   2015-07-21 18:19:38
247   2015-07-21 18:47:23
251   2015-07-21 19:00:12
252   2015-07-21 19:01:12
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-21 08:22:15 to 2015-07-21 08:46:30 because start = 0 and end = 1
Ignoring trip from 2015-07-21 08:22:15 to 2015-07-21 08:46:30 because start = 0 and end = 1
Considering trip from 2015-07-21 08:46:30 to 2015-07-21 12:30:27 because start = 1 and end = 103
Considering trip from 2015-07-21 12:30:27 to 2015-07-21 13:21:30 because start = 103 and end = 106
Ignoring trip from 2015-07-21 12:30:27 to 2015-07-21 13:21:30 because start = 103 and end = 106
Considering trip from 2015-07-21 13:21:30 to 2015-07-21 13:30:29 because start = 106 and end = 112
Considering trip from 2015-07-21 13:30:29 to 2015-07-21 14:41:11 because start = 112 and end = 145
Considering trip from 2015-07-21 14:41:11 to 2015-07-21 15:46:13 because start = 145 and end = 213
Considering trip from 2015-07-21 15:46:13 to 2015-07-21 15:54:49 because start = 213 and end = 216
Ignoring trip from 2015-07-21 15:46:13 to 2015-07-21 15:54:49 because start = 213 and end = 216
Considering trip from 2015-07-21 15:54:49 to 2015-07-21 17:38:30 because start = 216 and end = 235
Considering trip from 2015-07-21 17:38:30 to 2015-07-21 18:07:49 because start = 235 and end = 237
Ignoring trip from 2015-07-21 17:38:30 to 2015-07-21 18:07:49 because start = 235 and end = 237
Considering trip from 2015-07-21 18:07:49 to 2015-07-21 18:19:38 because start = 237 and end = 245
Considering trip from 2015-07-21 18:19:38 to 2015-07-21 18:47:23 because start = 245 and end = 247
Ignoring trip from 2015-07-21 18:19:38 to 2015-07-21 18:47:23 because start = 245 and end = 247
Considering trip from 2015-07-21 18:47:23 to 2015-07-21 19:00:12 because start = 247 and end = 251
Considering trip from 2015-07-21 19:00:12 to 2015-07-21 19:01:12 because start = 251 and end = 252
Ignoring trip from 2015-07-21 19:00:12 to 2015-07-21 19:01:12 because start = 251 and end = 252
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437465600000.0}}, {'data.mTime': {'$lt': 1437552000000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'distance'} 
filtering points Int64Index([48, 103, 112, 116, 161, 190, 194, 213, 214, 229, 235, 251], dtype='int64')
filtered list size went from (253, 28) to (241, 28)
speedThreshold = 41
Found 3 potential outliers, list = [114 169 177]
Only one candidate, cluster centers are [array([114, 169, 177])]
Considering candidate cluster center [114 169 177]
lowRange = max(109, 0) = 109 and highRange = max(119, 241) = 119
Area size = 11, index = Int64Index([109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (241, 28), ransac_mask size = 241
filtered split indices are Int64Index([0, 1, 102, 104, 110, 141, 206, 207, 225, 226, 234, 236, 240, 240], dtype='int64')
0     2015-07-21 08:22:15
1     2015-07-21 08:46:30
102   2015-07-21 09:46:40
104   2015-07-21 12:30:30
110   2015-07-21 13:24:07
141   2015-07-21 13:45:43
206   2015-07-21 15:31:39
207   2015-07-21 15:32:35
225   2015-07-21 16:06:00
226   2015-07-21 16:06:46
234   2015-07-21 16:13:22
236   2015-07-21 17:38:34
240   2015-07-21 18:09:10
240   2015-07-21 18:09:10
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-21 08:22:15 to 2015-07-21 08:46:30 because start = 0 and end = 1
Ignoring trip from 2015-07-21 08:22:15 to 2015-07-21 08:46:30 because start = 0 and end = 1
Considering trip from 2015-07-21 08:46:30 to 2015-07-21 12:30:30 because start = 1 and end = 102
Considering trip from 2015-07-21 12:30:30 to 2015-07-21 13:21:30 because start = 102 and end = 104
Ignoring trip from 2015-07-21 12:30:30 to 2015-07-21 13:21:30 because start = 102 and end = 104
Considering trip from 2015-07-21 13:21:30 to 2015-07-21 13:30:32 because start = 104 and end = 110
Considering trip from 2015-07-21 13:30:32 to 2015-07-21 14:41:11 because start = 110 and end = 141
Considering trip from 2015-07-21 14:41:11 to 2015-07-21 15:47:39 because start = 141 and end = 206
Considering trip from 2015-07-21 15:47:39 to 2015-07-21 15:54:49 because start = 206 and end = 207
Ignoring trip from 2015-07-21 15:47:39 to 2015-07-21 15:54:49 because start = 206 and end = 207
Considering trip from 2015-07-21 15:54:49 to 2015-07-21 17:38:34 because start = 207 and end = 225
Considering trip from 2015-07-21 17:38:34 to 2015-07-21 18:07:49 because start = 225 and end = 226
Ignoring trip from 2015-07-21 17:38:34 to 2015-07-21 18:07:49 because start = 225 and end = 226
Considering trip from 2015-07-21 18:07:49 to 2015-07-21 18:19:38 because start = 226 and end = 234
Considering trip from 2015-07-21 18:19:38 to 2015-07-21 18:47:23 because start = 234 and end = 236
Ignoring trip from 2015-07-21 18:19:38 to 2015-07-21 18:47:23 because start = 234 and end = 236
Considering trip from 2015-07-21 18:47:23 to 2015-07-21 19:01:12 because start = 236 and end = 240
Considering trip from 2015-07-21 19:01:12 to 2015-07-21 19:01:12 because start = 240 and end = 240
Ignoring trip from 2015-07-21 19:01:12 to 2015-07-21 19:01:12 because start = 240 and end = 240
Out[121]:

In [122]:
my_dist_filter_map_list_22 = get_filter_compare(lq.get_uuid_list()[0], "distance", get_jul_dt(22), get_jul_dt(23))
ipy.inline_maps(my_dist_filter_map_list_22, len(my_dist_filter_map_list_22), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 59, 87, 90, 122, 128, 169, 172, 191, 195, 197], dtype='int64')
0     2015-07-22 08:14:53
59    2015-07-22 09:09:00
87    2015-07-22 09:26:56
90    2015-07-22 13:16:38
122   2015-07-22 14:41:15
128   2015-07-22 14:51:49
169   2015-07-22 16:02:29
172   2015-07-22 16:08:17
191   2015-07-22 17:51:17
195   2015-07-22 18:09:52
197   2015-07-22 18:14:22
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:14:53 to 2015-07-22 09:09:00 because start = 0 and end = 59
Considering trip from 2015-07-22 09:09:00 to 2015-07-22 09:26:56 because start = 59 and end = 87
Considering trip from 2015-07-22 09:26:56 to 2015-07-22 13:16:38 because start = 87 and end = 90
Ignoring trip from 2015-07-22 09:26:56 to 2015-07-22 13:16:38 because start = 87 and end = 90
Considering trip from 2015-07-22 13:16:38 to 2015-07-22 14:41:15 because start = 90 and end = 122
Considering trip from 2015-07-22 14:41:15 to 2015-07-22 14:51:49 because start = 122 and end = 128
Considering trip from 2015-07-22 14:51:49 to 2015-07-22 16:02:29 because start = 128 and end = 169
Considering trip from 2015-07-22 16:02:29 to 2015-07-22 16:08:17 because start = 169 and end = 172
Ignoring trip from 2015-07-22 16:02:29 to 2015-07-22 16:08:17 because start = 169 and end = 172
Considering trip from 2015-07-22 16:08:17 to 2015-07-22 17:51:17 because start = 172 and end = 191
Considering trip from 2015-07-22 17:51:17 to 2015-07-22 18:09:52 because start = 191 and end = 195
Considering trip from 2015-07-22 18:09:52 to 2015-07-22 18:14:22 because start = 195 and end = 197
Ignoring trip from 2015-07-22 18:09:52 to 2015-07-22 18:14:22 because start = 195 and end = 197
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'distance'} 
filtering points Int64Index([19, 20, 25, 30, 61, 81, 108, 115, 129, 138, 169, 172, 174, 191, 192], dtype='int64')
filtered list size went from (198, 28) to (183, 28)
speedThreshold = 37
Found 2 potential outliers, list = [61 63]
Only one candidate, cluster centers are [array([61, 63])]
Considering candidate cluster center [61 63]
lowRange = max(56, 0) = 56 and highRange = max(66, 183) = 66
Area size = 11, index = Int64Index([56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (183, 28), ransac_mask size = 183
filtered split indices are Int64Index([0, 55, 81, 84, 114, 120, 159, 161, 178, 180, 182], dtype='int64')
0     2015-07-22 08:14:53
55    2015-07-22 08:59:22
81    2015-07-22 09:14:50
84    2015-07-22 09:20:04
114   2015-07-22 13:45:43
120   2015-07-22 13:50:43
159   2015-07-22 15:27:18
161   2015-07-22 15:30:54
178   2015-07-22 16:12:20
180   2015-07-22 16:13:50
182   2015-07-22 16:15:21
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:14:53 to 2015-07-22 09:09:00 because start = 0 and end = 55
Considering trip from 2015-07-22 09:09:00 to 2015-07-22 09:26:56 because start = 55 and end = 81
Considering trip from 2015-07-22 09:26:56 to 2015-07-22 13:16:38 because start = 81 and end = 84
Ignoring trip from 2015-07-22 09:26:56 to 2015-07-22 13:16:38 because start = 81 and end = 84
Considering trip from 2015-07-22 13:16:38 to 2015-07-22 14:41:15 because start = 84 and end = 114
Considering trip from 2015-07-22 14:41:15 to 2015-07-22 14:51:49 because start = 114 and end = 120
Considering trip from 2015-07-22 14:51:49 to 2015-07-22 16:02:32 because start = 120 and end = 159
Considering trip from 2015-07-22 16:02:32 to 2015-07-22 16:08:20 because start = 159 and end = 161
Ignoring trip from 2015-07-22 16:02:32 to 2015-07-22 16:08:20 because start = 159 and end = 161
Considering trip from 2015-07-22 16:08:20 to 2015-07-22 17:54:21 because start = 161 and end = 178
Considering trip from 2015-07-22 17:54:21 to 2015-07-22 18:09:52 because start = 178 and end = 180
Ignoring trip from 2015-07-22 17:54:21 to 2015-07-22 18:09:52 because start = 178 and end = 180
Considering trip from 2015-07-22 18:09:52 to 2015-07-22 18:14:22 because start = 180 and end = 182
Ignoring trip from 2015-07-22 18:09:52 to 2015-07-22 18:14:22 because start = 180 and end = 182
Out[122]:

In [123]:
my_time_filter_map_list_22 = get_filter_compare(lq.get_uuid_list()[0], "time", get_jul_dt(22), get_jul_dt(23))
ipy.inline_maps(my_time_filter_map_list_22, len(my_time_filter_map_list_22), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 181, 257, 372, 431, 432], dtype='int64')
0     2015-07-22 08:14:53
181   2015-07-22 13:16:38
257   2015-07-22 14:41:15
372   2015-07-22 16:02:29
431   2015-07-22 17:51:17
432   2015-07-22 17:54:21
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:14:53 to 2015-07-22 13:16:38 because start = 0 and end = 181
Considering trip from 2015-07-22 13:16:38 to 2015-07-22 14:41:15 because start = 181 and end = 257
Considering trip from 2015-07-22 14:41:15 to 2015-07-22 16:02:29 because start = 257 and end = 372
Considering trip from 2015-07-22 16:02:29 to 2015-07-22 17:51:17 because start = 372 and end = 431
Considering trip from 2015-07-22 17:51:17 to 2015-07-22 17:54:21 because start = 431 and end = 432
Ignoring trip from 2015-07-22 17:51:17 to 2015-07-22 17:54:21 because start = 431 and end = 432
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
filtering points Int64Index([33, 34, 51, 54, 61, 62, 124, 144, 145, 146, 216, 217, 218, 219, 221, 225, 226, 229, 235, 241, 276, 296, 298, 305, 311, 325, 372, 382, 384, 431], dtype='int64')
filtered list size went from (433, 28) to (403, 28)
speedThreshold = 35
Found 5 potential outliers, list = [ 36 122 124 132 136]
Only one candidate, cluster centers are [array([ 36, 122, 124, 132, 136])]
Considering candidate cluster center [ 36 122 124 132 136]
lowRange = max(31, 0) = 31 and highRange = max(41, 403) = 41
Area size = 11, index = Int64Index([31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41], dtype='int64') with size 11
Deleted 2 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (403, 28), ransac_mask size = 403
filtered split indices are Int64Index([0, 171, 237, 346, 402, 402], dtype='int64')
0     2015-07-22 08:14:53
171   2015-07-22 09:27:57
237   2015-07-22 13:45:57
346   2015-07-22 15:28:54
402   2015-07-22 16:17:21
402   2015-07-22 16:17:21
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:14:53 to 2015-07-22 13:16:38 because start = 0 and end = 171
Considering trip from 2015-07-22 13:16:38 to 2015-07-22 14:41:15 because start = 171 and end = 237
Considering trip from 2015-07-22 14:41:15 to 2015-07-22 16:02:32 because start = 237 and end = 346
Considering trip from 2015-07-22 16:02:32 to 2015-07-22 17:54:21 because start = 346 and end = 402
Considering trip from 2015-07-22 17:54:21 to 2015-07-22 17:54:21 because start = 402 and end = 402
Ignoring trip from 2015-07-22 17:54:21 to 2015-07-22 17:54:21 because start = 402 and end = 402
Out[123]:

In [124]:
tom_time_filter_map_list_22 = get_filter_compare(lq.get_uuid_list()[2], "time", get_jul_dt(22), get_jul_dt(23))
ipy.inline_maps(tom_time_filter_map_list_22, len(tom_time_filter_map_list_22), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 10, 31, 74, 114, 138, 152, 161], dtype='int64')
0     2015-07-22 08:05:53
10    2015-07-22 08:26:15
31    2015-07-22 08:40:35
74    2015-07-22 17:47:30
114   2015-07-22 18:40:26
138   2015-07-22 23:14:15
152   2015-07-23 00:43:50
161   2015-07-23 00:48:05
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:05:53 to 2015-07-22 08:26:15 because start = 0 and end = 10
Considering trip from 2015-07-22 08:26:15 to 2015-07-22 08:40:35 because start = 10 and end = 31
Considering trip from 2015-07-22 08:40:35 to 2015-07-22 17:47:30 because start = 31 and end = 74
Considering trip from 2015-07-22 17:47:30 to 2015-07-22 18:40:26 because start = 74 and end = 114
Considering trip from 2015-07-22 18:40:26 to 2015-07-22 23:14:15 because start = 114 and end = 138
Considering trip from 2015-07-22 23:14:15 to 2015-07-23 00:43:50 because start = 138 and end = 152
Considering trip from 2015-07-23 00:43:50 to 2015-07-23 00:48:05 because start = 152 and end = 161
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'time'} 
filtering points Int64Index([0, 10, 11, 12, 13, 14, 16, 20, 31, 32, 33, 34, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 61, 62, 63, 79, 80, 81, 82, 83, 84, 86, 89, 90, 91, 92, 93, 94, 101, 114, 115, 116, 133, 138, 152], dtype='int64')
filtered list size went from (162, 28) to (106, 28)
speedThreshold = 31
Found 2 potential outliers, list = [45 47]
Only one candidate, cluster centers are [array([45, 47])]
Considering candidate cluster center [45 47]
lowRange = max(40, 0) = 40 and highRange = max(50, 106) = 50
Area size = 11, index = Int64Index([40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50], dtype='int64') with size 11
Deleted 3 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (106, 28), ransac_mask size = 106
filtered split indices are Int64Index([0, 9, 23, 26, 38, 46, 64, 84, 97, 105], dtype='int64')
0     2015-07-22 08:05:53
9     2015-07-22 08:10:06
23    2015-07-22 08:31:21
26    2015-07-22 08:32:45
38    2015-07-22 08:43:11
46    2015-07-22 08:47:26
64    2015-07-22 08:57:47
84    2015-07-22 17:52:42
97    2015-07-22 17:59:36
105   2015-07-22 18:03:46
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:05:57 to 2015-07-22 08:26:54 because start = 0 and end = 9
Considering trip from 2015-07-22 08:26:54 to 2015-07-22 08:42:08 because start = 9 and end = 23
Considering trip from 2015-07-22 08:42:08 to 2015-07-22 08:53:56 because start = 23 and end = 26
Ignoring trip from 2015-07-22 08:42:08 to 2015-07-22 08:53:56 because start = 23 and end = 26
Considering trip from 2015-07-22 08:53:56 to 2015-07-22 17:47:30 because start = 26 and end = 38
Considering trip from 2015-07-22 17:47:30 to 2015-07-22 17:58:34 because start = 38 and end = 46
Considering trip from 2015-07-22 17:58:34 to 2015-07-22 18:41:50 because start = 46 and end = 64
Considering trip from 2015-07-22 18:41:50 to 2015-07-22 23:14:20 because start = 64 and end = 84
Considering trip from 2015-07-22 23:14:20 to 2015-07-23 00:43:55 because start = 84 and end = 97
Considering trip from 2015-07-23 00:43:55 to 2015-07-23 00:48:05 because start = 97 and end = 105
Out[124]:

In [125]:
tom_dist_filter_map_list_22 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(22), get_jul_dt(23))
ipy.inline_maps(tom_dist_filter_map_list_22, len(tom_dist_filter_map_list_22), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 2, 14, 23, 29, 48, 58, 60, 61], dtype='int64')
0    2015-07-22 08:05:53
2    2015-07-22 08:26:15
14   2015-07-22 08:40:35
23   2015-07-22 08:51:06
29   2015-07-22 17:47:30
48   2015-07-22 18:40:26
58   2015-07-22 23:14:15
60   2015-07-23 00:43:50
61   2015-07-23 00:43:55
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:05:53 to 2015-07-22 08:26:15 because start = 0 and end = 2
Ignoring trip from 2015-07-22 08:05:53 to 2015-07-22 08:26:15 because start = 0 and end = 2
Considering trip from 2015-07-22 08:26:15 to 2015-07-22 08:40:35 because start = 2 and end = 14
Considering trip from 2015-07-22 08:40:35 to 2015-07-22 08:51:06 because start = 14 and end = 23
Considering trip from 2015-07-22 08:51:06 to 2015-07-22 17:47:30 because start = 23 and end = 29
Considering trip from 2015-07-22 17:47:30 to 2015-07-22 18:40:26 because start = 29 and end = 48
Considering trip from 2015-07-22 18:40:26 to 2015-07-22 23:14:15 because start = 48 and end = 58
Considering trip from 2015-07-22 23:14:15 to 2015-07-23 00:43:50 because start = 58 and end = 60
Ignoring trip from 2015-07-22 23:14:15 to 2015-07-23 00:43:50 because start = 58 and end = 60
Considering trip from 2015-07-23 00:43:50 to 2015-07-23 00:43:55 because start = 60 and end = 61
Ignoring trip from 2015-07-23 00:43:50 to 2015-07-23 00:43:55 because start = 60 and end = 61
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437552000000.0}}, {'data.mTime': {'$lt': 1437638400000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([0, 2, 3, 4, 5, 6, 8, 12, 14, 15, 16, 17, 21, 22, 23, 24, 25, 27, 34, 36, 39, 44, 48, 49, 58, 60], dtype='int64')
filtered list size went from (62, 28) to (36, 28)
speedThreshold = 32
Found 1 potential outliers, list = [20]
Only one candidate, cluster centers are [array([20])]
Considering candidate cluster center [20]
lowRange = max(15, 0) = 15 and highRange = max(25, 36) = 25
Area size = 11, index = Int64Index([15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25], dtype='int64') with size 11
Deleted 2 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (36, 28), ransac_mask size = 36
filtered split indices are Int64Index([0, 1, 6, 9, 11, 17, 26, 34, 35, 35], dtype='int64')
0    2015-07-22 08:05:53
1    2015-07-22 08:05:57
6    2015-07-22 08:26:39
9    2015-07-22 08:27:44
11   2015-07-22 08:28:43
17   2015-07-22 08:41:59
26   2015-07-22 08:53:56
34   2015-07-22 17:50:07
35   2015-07-22 17:53:19
35   2015-07-22 17:53:19
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-22 08:05:57 to 2015-07-22 08:26:54 because start = 0 and end = 1
Ignoring trip from 2015-07-22 08:05:57 to 2015-07-22 08:26:54 because start = 0 and end = 1
Considering trip from 2015-07-22 08:26:54 to 2015-07-22 08:42:08 because start = 1 and end = 6
Considering trip from 2015-07-22 08:42:08 to 2015-07-22 08:53:56 because start = 6 and end = 9
Ignoring trip from 2015-07-22 08:42:08 to 2015-07-22 08:53:56 because start = 6 and end = 9
Considering trip from 2015-07-22 08:53:56 to 2015-07-22 17:47:30 because start = 9 and end = 11
Ignoring trip from 2015-07-22 08:53:56 to 2015-07-22 17:47:30 because start = 9 and end = 11
Considering trip from 2015-07-22 17:47:30 to 2015-07-22 17:54:22 because start = 11 and end = 17
Considering trip from 2015-07-22 17:54:22 to 2015-07-22 18:41:50 because start = 17 and end = 26
Considering trip from 2015-07-22 18:41:50 to 2015-07-22 23:14:20 because start = 26 and end = 34
Considering trip from 2015-07-22 23:14:20 to 2015-07-23 00:43:55 because start = 34 and end = 35
Ignoring trip from 2015-07-22 23:14:20 to 2015-07-23 00:43:55 because start = 34 and end = 35
Considering trip from 2015-07-23 00:43:55 to 2015-07-23 00:43:55 because start = 35 and end = 35
Ignoring trip from 2015-07-23 00:43:55 to 2015-07-23 00:43:55 because start = 35 and end = 35
Out[125]:

In [126]:
import numpy as np

In [127]:
df[df.mLatitude == 37.262226300000002][["mAccuracy", "mTime", "mElapsedRealtimeNanos"]]


Out[127]:
mAccuracy mTime mElapsedRealtimeNanos

In [128]:
tom_dist_filter_map_list_23 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(23), get_jul_dt(24))
ipy.inline_maps(tom_dist_filter_map_list_23, len(tom_dist_filter_map_list_23), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 2, 4, 64, 66, 77, 80, 110, 127, 238, 241, 245, 248], dtype='int64')
0     2015-07-23 01:14:32
2     2015-07-23 08:09:59
4     2015-07-23 08:16:09
64    2015-07-23 09:19:23
66    2015-07-23 09:28:08
77    2015-07-23 09:45:57
80    2015-07-23 09:56:17
110   2015-07-23 14:36:50
127   2015-07-23 15:04:13
238   2015-07-23 16:55:58
241   2015-07-23 17:15:31
245   2015-07-23 19:51:32
248   2015-07-23 19:59:22
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-23 01:14:32 to 2015-07-23 08:09:59 because start = 0 and end = 2
Ignoring trip from 2015-07-23 01:14:32 to 2015-07-23 08:09:59 because start = 0 and end = 2
Considering trip from 2015-07-23 08:09:59 to 2015-07-23 08:16:09 because start = 2 and end = 4
Ignoring trip from 2015-07-23 08:09:59 to 2015-07-23 08:16:09 because start = 2 and end = 4
Considering trip from 2015-07-23 08:16:09 to 2015-07-23 09:19:23 because start = 4 and end = 64
Considering trip from 2015-07-23 09:19:23 to 2015-07-23 09:28:08 because start = 64 and end = 66
Ignoring trip from 2015-07-23 09:19:23 to 2015-07-23 09:28:08 because start = 64 and end = 66
Considering trip from 2015-07-23 09:28:08 to 2015-07-23 09:45:57 because start = 66 and end = 77
Considering trip from 2015-07-23 09:45:57 to 2015-07-23 09:56:17 because start = 77 and end = 80
Ignoring trip from 2015-07-23 09:45:57 to 2015-07-23 09:56:17 because start = 77 and end = 80
Considering trip from 2015-07-23 09:56:17 to 2015-07-23 14:36:50 because start = 80 and end = 110
Considering trip from 2015-07-23 14:36:50 to 2015-07-23 15:04:13 because start = 110 and end = 127
Considering trip from 2015-07-23 15:04:13 to 2015-07-23 16:55:58 because start = 127 and end = 238
Considering trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 238 and end = 241
Ignoring trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 238 and end = 241
Considering trip from 2015-07-23 17:15:31 to 2015-07-23 19:51:32 because start = 241 and end = 245
Considering trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 245 and end = 248
Ignoring trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 245 and end = 248
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([0, 2, 61, 65, 66, 71, 77, 79, 80, 87, 88, 99, 120, 123, 125, 127, 129, 132, 133, 134, 135, 136, 152, 156, 158, 163, 165, 166, 168, 171, 173, 177, 179, 190, 192, 196, 208, 210, 213, 219, 224, 228, 230, 232], dtype='int64')
filtered list size went from (249, 28) to (205, 28)
speedThreshold = 143
Found 3 potential outliers, list = [68 69 70]
Only one candidate, cluster centers are [array([68, 69, 70])]
Considering candidate cluster center [68 69 70]
lowRange = max(63, 0) = 63 and highRange = max(73, 205) = 73
Area size = 11, index = Int64Index([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73], dtype='int64') with size 11
Deleted 2 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (205, 28), ransac_mask size = 205
filtered split indices are Int64Index([0, 1, 2, 61, 62, 71, 74, 98, 112, 115, 138, 194, 197, 201, 204], dtype='int64')
0     2015-07-23 01:14:32
1     2015-07-23 01:14:37
2     2015-07-23 08:09:59
61    2015-07-23 09:06:22
62    2015-07-23 09:06:57
71    2015-07-23 09:32:50
74    2015-07-23 09:35:34
98    2015-07-23 10:08:58
112   2015-07-23 14:38:45
115   2015-07-23 14:42:58
138   2015-07-23 15:15:26
194   2015-07-23 16:10:41
197   2015-07-23 16:12:54
201   2015-07-23 16:15:55
204   2015-07-23 16:18:00
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-23 01:14:37 to 2015-07-23 08:10:04 because start = 0 and end = 1
Ignoring trip from 2015-07-23 01:14:37 to 2015-07-23 08:10:04 because start = 0 and end = 1
Considering trip from 2015-07-23 08:10:04 to 2015-07-23 08:16:09 because start = 1 and end = 2
Ignoring trip from 2015-07-23 08:10:04 to 2015-07-23 08:16:09 because start = 1 and end = 2
Considering trip from 2015-07-23 08:16:09 to 2015-07-23 09:19:23 because start = 2 and end = 61
Considering trip from 2015-07-23 09:19:23 to 2015-07-23 09:29:14 because start = 61 and end = 62
Ignoring trip from 2015-07-23 09:19:23 to 2015-07-23 09:29:14 because start = 61 and end = 62
Considering trip from 2015-07-23 09:29:14 to 2015-07-23 09:46:32 because start = 62 and end = 71
Considering trip from 2015-07-23 09:46:32 to 2015-07-23 09:57:55 because start = 71 and end = 74
Ignoring trip from 2015-07-23 09:46:32 to 2015-07-23 09:57:55 because start = 71 and end = 74
Considering trip from 2015-07-23 09:57:55 to 2015-07-23 14:36:50 because start = 74 and end = 98
Considering trip from 2015-07-23 14:36:50 to 2015-07-23 15:05:21 because start = 98 and end = 112
Considering trip from 2015-07-23 15:05:21 to 2015-07-23 15:13:24 because start = 112 and end = 115
Ignoring trip from 2015-07-23 15:05:21 to 2015-07-23 15:13:24 because start = 112 and end = 115
Considering trip from 2015-07-23 15:13:24 to 2015-07-23 15:45:30 because start = 115 and end = 138
Considering trip from 2015-07-23 15:45:30 to 2015-07-23 16:55:58 because start = 138 and end = 194
Considering trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 194 and end = 197
Ignoring trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 194 and end = 197
Considering trip from 2015-07-23 17:15:31 to 2015-07-23 19:51:32 because start = 197 and end = 201
Considering trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 201 and end = 204
Ignoring trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 201 and end = 204
Out[128]:

In [129]:
my_time_filter_map_list_23 = get_filter_compare(lq.get_uuid_list()[0], "time", get_jul_dt(23), get_jul_dt(24))
ipy.inline_maps(my_time_filter_map_list_23, len(my_time_filter_map_list_23), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 135, 217, 458, 459, 469, 479, 503], dtype='int64')
0     2015-07-23 08:16:14
135   2015-07-23 09:51:10
217   2015-07-23 14:30:59
458   2015-07-23 16:46:47
459   2015-07-23 16:53:47
469   2015-07-23 19:40:05
479   2015-07-23 21:09:45
503   2015-07-23 21:21:06
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-23 08:16:14 to 2015-07-23 09:51:10 because start = 0 and end = 135
Considering trip from 2015-07-23 09:51:10 to 2015-07-23 14:30:59 because start = 135 and end = 217
Considering trip from 2015-07-23 14:30:59 to 2015-07-23 16:46:47 because start = 217 and end = 458
Considering trip from 2015-07-23 16:46:47 to 2015-07-23 16:53:47 because start = 458 and end = 459
Ignoring trip from 2015-07-23 16:46:47 to 2015-07-23 16:53:47 because start = 458 and end = 459
Considering trip from 2015-07-23 16:53:47 to 2015-07-23 19:40:05 because start = 459 and end = 469
Considering trip from 2015-07-23 19:40:05 to 2015-07-23 21:09:45 because start = 469 and end = 479
Considering trip from 2015-07-23 21:09:45 to 2015-07-23 21:21:06 because start = 479 and end = 503
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
filtering points Int64Index([41, 42, 43, 45, 46, 47, 48, 49, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 67, 68, 70, 76, 81, 82, 83, 84, 91, 96, 101, 102, 105, 106, 109, 110, 111, 113, 114, 117, 118, 119, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 160, 161, 162, 165, 166, 167, 168, 169, 170, 171, 172, 173, 194, 217, 232, 233, 249, 250, 251, 252, 253, 254, 255, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, ...], dtype='int64')
filtered list size went from (504, 28) to (324, 28)
speedThreshold = 32
Found 4 potential outliers, list = [ 67  68  70 269]
Only one candidate, cluster centers are [array([ 67,  68,  70, 269])]
Considering candidate cluster center [ 67  68  70 269]
lowRange = max(62, 0) = 62 and highRange = max(72, 324) = 72
Area size = 11, index = Int64Index([62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (324, 28), ransac_mask size = 324
filtered split indices are Int64Index([0, 44, 89, 93, 96, 138, 169, 174, 224, 271, 280, 281, 291, 300, 323], dtype='int64')
0     2015-07-23 08:16:14
44    2015-07-23 08:26:17
89    2015-07-23 08:51:14
93    2015-07-23 08:53:15
96    2015-07-23 08:54:44
138   2015-07-23 09:52:41
169   2015-07-23 10:10:46
174   2015-07-23 10:13:20
224   2015-07-23 14:34:06
271   2015-07-23 14:59:44
280   2015-07-23 15:01:57
281   2015-07-23 15:02:00
291   2015-07-23 15:03:33
300   2015-07-23 15:09:03
323   2015-07-23 15:20:30
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-23 08:16:14 to 2015-07-23 08:35:14 because start = 0 and end = 44
Considering trip from 2015-07-23 08:35:14 to 2015-07-23 09:57:43 because start = 44 and end = 89
Considering trip from 2015-07-23 09:57:43 to 2015-07-23 10:05:46 because start = 89 and end = 93
Considering trip from 2015-07-23 10:05:46 to 2015-07-23 10:13:20 because start = 93 and end = 96
Ignoring trip from 2015-07-23 10:05:46 to 2015-07-23 10:13:20 because start = 93 and end = 96
Considering trip from 2015-07-23 10:13:20 to 2015-07-23 14:31:07 because start = 96 and end = 138
Considering trip from 2015-07-23 14:31:07 to 2015-07-23 15:04:29 because start = 138 and end = 169
Considering trip from 2015-07-23 15:04:29 to 2015-07-23 15:13:29 because start = 169 and end = 174
Considering trip from 2015-07-23 15:13:29 to 2015-07-23 15:51:42 because start = 174 and end = 224
Considering trip from 2015-07-23 15:51:42 to 2015-07-23 16:29:15 because start = 224 and end = 271
Considering trip from 2015-07-23 16:29:15 to 2015-07-23 16:46:47 because start = 271 and end = 280
Considering trip from 2015-07-23 16:46:47 to 2015-07-23 16:53:47 because start = 280 and end = 281
Ignoring trip from 2015-07-23 16:46:47 to 2015-07-23 16:53:47 because start = 280 and end = 281
Considering trip from 2015-07-23 16:53:47 to 2015-07-23 19:40:08 because start = 281 and end = 291
Considering trip from 2015-07-23 19:40:08 to 2015-07-23 21:09:45 because start = 291 and end = 300
Considering trip from 2015-07-23 21:09:45 to 2015-07-23 21:21:06 because start = 300 and end = 323
Out[129]:

In [130]:
my_time_filter_map_list_24 = get_filter_compare(lq.get_uuid_list()[0], "time", get_jul_dt(24), get_jul_dt(25))
ipy.inline_maps(my_time_filter_map_list_24, len(my_time_filter_map_list_24), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437724800000.0}}, {'data.mTime': {'$lt': 1437811200000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 64, 125, 136, 224, 270, 296, 297, 306, 334], dtype='int64')
0     2015-07-24 07:50:40
64    2015-07-24 08:37:08
125   2015-07-24 15:54:38
136   2015-07-24 16:42:29
224   2015-07-24 17:21:31
270   2015-07-24 17:54:48
296   2015-07-24 18:29:27
297   2015-07-24 18:50:18
306   2015-07-24 19:12:24
334   2015-07-24 19:26:41
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-24 07:50:40 to 2015-07-24 08:37:08 because start = 0 and end = 64
Considering trip from 2015-07-24 08:37:08 to 2015-07-24 15:54:38 because start = 64 and end = 125
Considering trip from 2015-07-24 15:54:38 to 2015-07-24 16:42:29 because start = 125 and end = 136
Considering trip from 2015-07-24 16:42:29 to 2015-07-24 17:21:31 because start = 136 and end = 224
Considering trip from 2015-07-24 17:21:31 to 2015-07-24 17:54:48 because start = 224 and end = 270
Considering trip from 2015-07-24 17:54:48 to 2015-07-24 18:29:27 because start = 270 and end = 296
Considering trip from 2015-07-24 18:29:27 to 2015-07-24 18:50:18 because start = 296 and end = 297
Ignoring trip from 2015-07-24 18:29:27 to 2015-07-24 18:50:18 because start = 296 and end = 297
Considering trip from 2015-07-24 18:50:18 to 2015-07-24 19:12:24 because start = 297 and end = 306
Considering trip from 2015-07-24 19:12:24 to 2015-07-24 19:26:41 because start = 306 and end = 334
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437724800000.0}}, {'data.mTime': {'$lt': 1437811200000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
filtering points Int64Index([24, 26, 30, 45, 64, 79, 80, 125, 147, 149, 150, 166, 189, 200, 202, 214, 215, 219, 224, 226, 228, 238, 276, 296, 298], dtype='int64')
filtered list size went from (335, 28) to (310, 28)
speedThreshold = 35
Found 4 potential outliers, list = [211 213 215 216]
Only one candidate, cluster centers are [array([211, 213, 215, 216])]
Considering candidate cluster center [211 213 215 216]
lowRange = max(206, 0) = 206 and highRange = max(216, 310) = 216
Area size = 11, index = Int64Index([206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (310, 28), ransac_mask size = 310
filtered split indices are Int64Index([0, 60, 118, 128, 206, 248, 273, 281, 309], dtype='int64')
0     2015-07-24 07:50:40
60    2015-07-24 08:21:57
118   2015-07-24 09:03:43
128   2015-07-24 15:55:40
206   2015-07-24 17:08:57
248   2015-07-24 17:37:43
273   2015-07-24 17:55:39
281   2015-07-24 17:59:52
309   2015-07-24 19:14:08
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-24 07:50:40 to 2015-07-24 08:37:11 because start = 0 and end = 60
Considering trip from 2015-07-24 08:37:11 to 2015-07-24 15:54:40 because start = 60 and end = 118
Considering trip from 2015-07-24 15:54:40 to 2015-07-24 16:42:29 because start = 118 and end = 128
Considering trip from 2015-07-24 16:42:29 to 2015-07-24 17:21:34 because start = 128 and end = 206
Considering trip from 2015-07-24 17:21:34 to 2015-07-24 17:54:48 because start = 206 and end = 248
Considering trip from 2015-07-24 17:54:48 to 2015-07-24 18:50:18 because start = 248 and end = 273
Considering trip from 2015-07-24 18:50:18 to 2015-07-24 19:12:24 because start = 273 and end = 281
Considering trip from 2015-07-24 19:12:24 to 2015-07-24 19:26:41 because start = 281 and end = 309
Out[130]:

In [131]:
tom_dist_filter_map_list_24 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(24), get_jul_dt(25))
ipy.inline_maps(tom_dist_filter_map_list_24, len(tom_dist_filter_map_list_24), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437724800000.0}}, {'data.mTime': {'$lt': 1437811200000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 22, 52, 60, 61, 88], dtype='int64')
0    2015-07-24 08:15:56
22   2015-07-24 14:08:08
52   2015-07-24 14:47:13
60   2015-07-24 15:04:56
61   2015-07-24 15:14:14
88   2015-07-24 15:52:32
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-24 08:15:56 to 2015-07-24 14:08:08 because start = 0 and end = 22
Considering trip from 2015-07-24 14:08:08 to 2015-07-24 14:47:13 because start = 22 and end = 52
Considering trip from 2015-07-24 14:47:13 to 2015-07-24 15:04:56 because start = 52 and end = 60
Considering trip from 2015-07-24 15:04:56 to 2015-07-24 15:14:14 because start = 60 and end = 61
Ignoring trip from 2015-07-24 15:04:56 to 2015-07-24 15:14:14 because start = 60 and end = 61
Considering trip from 2015-07-24 15:14:14 to 2015-07-24 15:52:32 because start = 61 and end = 88
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437724800000.0}}, {'data.mTime': {'$lt': 1437811200000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([24, 30, 76, 77], dtype='int64')
filtered list size went from (89, 28) to (85, 28)
speedThreshold = 20
Found 1 potential outliers, list = [70]
Only one candidate, cluster centers are [array([70])]
Considering candidate cluster center [70]
lowRange = max(65, 0) = 65 and highRange = max(75, 85) = 75
Area size = 11, index = Int64Index([65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (85, 28), ransac_mask size = 85
filtered split indices are Int64Index([0, 22, 50, 58, 59, 84], dtype='int64')
0    2015-07-24 08:15:56
22   2015-07-24 14:08:08
50   2015-07-24 14:31:50
58   2015-07-24 14:51:26
59   2015-07-24 14:51:57
84   2015-07-24 15:45:39
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-24 08:15:56 to 2015-07-24 14:08:08 because start = 0 and end = 22
Considering trip from 2015-07-24 14:08:08 to 2015-07-24 14:47:13 because start = 22 and end = 50
Considering trip from 2015-07-24 14:47:13 to 2015-07-24 15:04:56 because start = 50 and end = 58
Considering trip from 2015-07-24 15:04:56 to 2015-07-24 15:14:14 because start = 58 and end = 59
Ignoring trip from 2015-07-24 15:04:56 to 2015-07-24 15:14:14 because start = 58 and end = 59
Considering trip from 2015-07-24 15:14:14 to 2015-07-24 15:52:32 because start = 59 and end = 84
Out[131]:

In [132]:
tom_dist_filter_map_list_25 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(25), get_jul_dt(26))
ipy.inline_maps(tom_dist_filter_map_list_25, len(tom_dist_filter_map_list_25), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437811200000.0}}, {'data.mTime': {'$lt': 1437897600000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 5, 12, 20, 30], dtype='int64')
0    2015-07-25 10:13:31
5    2015-07-25 10:55:29
12   2015-07-25 15:25:13
20   2015-07-25 16:28:01
30   2015-07-25 16:38:23
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-25 10:13:31 to 2015-07-25 10:55:29 because start = 0 and end = 5
Considering trip from 2015-07-25 10:55:29 to 2015-07-25 15:25:13 because start = 5 and end = 12
Considering trip from 2015-07-25 15:25:13 to 2015-07-25 16:28:01 because start = 12 and end = 20
Considering trip from 2015-07-25 16:28:01 to 2015-07-25 16:38:23 because start = 20 and end = 30
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437811200000.0}}, {'data.mTime': {'$lt': 1437897600000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([], dtype='int64')
filtered list size went from (31, 28) to (31, 28)
speedThreshold = 13
Found 1 potential outliers, list = [18]
Only one candidate, cluster centers are [array([18])]
Considering candidate cluster center [18]
lowRange = max(13, 0) = 13 and highRange = max(23, 31) = 23
Area size = 11, index = Int64Index([13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], dtype='int64') with size 11
Deleted 4 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (31, 28), ransac_mask size = 31
filtered split indices are Int64Index([0, 5, 12, 22, 30], dtype='int64')
0    2015-07-25 10:13:31
5    2015-07-25 10:55:29
12   2015-07-25 15:25:13
22   2015-07-25 16:29:35
30   2015-07-25 16:38:23
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-25 10:13:31 to 2015-07-25 10:55:29 because start = 0 and end = 5
Considering trip from 2015-07-25 10:55:29 to 2015-07-25 15:25:13 because start = 5 and end = 12
Considering trip from 2015-07-25 15:25:13 to 2015-07-25 16:29:35 because start = 12 and end = 22
Considering trip from 2015-07-25 16:29:35 to 2015-07-25 16:38:23 because start = 22 and end = 30
Out[132]:

In [133]:
tom_dist_filter_map_list_27 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(27), get_jul_dt(28))
ipy.inline_maps(tom_dist_filter_map_list_27, len(tom_dist_filter_map_list_27), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437984000000.0}}, {'data.mTime': {'$lt': 1438070400000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 2, 4, 6, 18, 19, 20, 32, 33, 90, 95, 111, 123, 126, 152, 163, 165, 167, 168, 182, 186, 186], dtype='int64')
0     2015-07-27 07:40:04
2     2015-07-27 08:04:03
4     2015-07-27 08:36:17
6     2015-07-27 08:50:00
18    2015-07-27 09:17:09
19    2015-07-27 09:22:39
20    2015-07-27 09:29:03
32    2015-07-27 09:53:51
33    2015-07-27 10:05:25
90    2015-07-27 11:11:30
95    2015-07-27 11:22:29
111   2015-07-27 11:48:48
123   2015-07-27 12:12:02
126   2015-07-27 12:22:46
152   2015-07-27 12:56:06
163   2015-07-27 13:19:03
165   2015-07-27 13:27:23
167   2015-07-27 13:35:27
168   2015-07-27 13:51:06
182   2015-07-27 17:27:47
186   2015-07-27 18:45:54
186   2015-07-27 18:45:54
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-27 07:40:04 to 2015-07-27 08:04:03 because start = 0 and end = 2
Ignoring trip from 2015-07-27 07:40:04 to 2015-07-27 08:04:03 because start = 0 and end = 2
Considering trip from 2015-07-27 08:04:03 to 2015-07-27 08:36:17 because start = 2 and end = 4
Ignoring trip from 2015-07-27 08:04:03 to 2015-07-27 08:36:17 because start = 2 and end = 4
Considering trip from 2015-07-27 08:36:17 to 2015-07-27 08:50:00 because start = 4 and end = 6
Ignoring trip from 2015-07-27 08:36:17 to 2015-07-27 08:50:00 because start = 4 and end = 6
Considering trip from 2015-07-27 08:50:00 to 2015-07-27 09:17:09 because start = 6 and end = 18
Considering trip from 2015-07-27 09:17:09 to 2015-07-27 09:22:39 because start = 18 and end = 19
Ignoring trip from 2015-07-27 09:17:09 to 2015-07-27 09:22:39 because start = 18 and end = 19
Considering trip from 2015-07-27 09:22:39 to 2015-07-27 09:29:03 because start = 19 and end = 20
Ignoring trip from 2015-07-27 09:22:39 to 2015-07-27 09:29:03 because start = 19 and end = 20
Considering trip from 2015-07-27 09:29:03 to 2015-07-27 09:53:51 because start = 20 and end = 32
Considering trip from 2015-07-27 09:53:51 to 2015-07-27 10:05:25 because start = 32 and end = 33
Ignoring trip from 2015-07-27 09:53:51 to 2015-07-27 10:05:25 because start = 32 and end = 33
Considering trip from 2015-07-27 10:05:25 to 2015-07-27 11:11:30 because start = 33 and end = 90
Considering trip from 2015-07-27 11:11:30 to 2015-07-27 11:22:29 because start = 90 and end = 95
Considering trip from 2015-07-27 11:22:29 to 2015-07-27 11:48:48 because start = 95 and end = 111
Considering trip from 2015-07-27 11:48:48 to 2015-07-27 12:12:02 because start = 111 and end = 123
Considering trip from 2015-07-27 12:12:02 to 2015-07-27 12:22:46 because start = 123 and end = 126
Ignoring trip from 2015-07-27 12:12:02 to 2015-07-27 12:22:46 because start = 123 and end = 126
Considering trip from 2015-07-27 12:22:46 to 2015-07-27 12:56:06 because start = 126 and end = 152
Considering trip from 2015-07-27 12:56:06 to 2015-07-27 13:19:03 because start = 152 and end = 163
Considering trip from 2015-07-27 13:19:03 to 2015-07-27 13:27:23 because start = 163 and end = 165
Ignoring trip from 2015-07-27 13:19:03 to 2015-07-27 13:27:23 because start = 163 and end = 165
Considering trip from 2015-07-27 13:27:23 to 2015-07-27 13:35:27 because start = 165 and end = 167
Ignoring trip from 2015-07-27 13:27:23 to 2015-07-27 13:35:27 because start = 165 and end = 167
Considering trip from 2015-07-27 13:35:27 to 2015-07-27 13:51:06 because start = 167 and end = 168
Ignoring trip from 2015-07-27 13:35:27 to 2015-07-27 13:51:06 because start = 167 and end = 168
Considering trip from 2015-07-27 13:51:06 to 2015-07-27 17:27:47 because start = 168 and end = 182
Considering trip from 2015-07-27 17:27:47 to 2015-07-27 18:45:54 because start = 182 and end = 186
Considering trip from 2015-07-27 18:45:54 to 2015-07-27 18:45:54 because start = 186 and end = 186
Ignoring trip from 2015-07-27 18:45:54 to 2015-07-27 18:45:54 because start = 186 and end = 186
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437984000000.0}}, {'data.mTime': {'$lt': 1438070400000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([0, 1, 3, 5, 6, 7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 64, 66, 67, 71, 73, 75, 77, 79, 81, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, ...], dtype='int64')
filtered list size went from (187, 28) to (36, 28)
speedThreshold = 81
Found 1 potential outliers, list = [22]
Only one candidate, cluster centers are [array([22])]
Considering candidate cluster center [22]
lowRange = max(17, 0) = 17 and highRange = max(27, 36) = 27
Area size = 11, index = Int64Index([17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27], dtype='int64') with size 11
Deleted 5 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (36, 28), ransac_mask size = 36
filtered split indices are Int64Index([0, 1, 2, 4, 5, 7, 19, 23, 29, 30, 32, 34, 35], dtype='int64')
0    2015-07-27 07:40:04
1    2015-07-27 07:40:16
2    2015-07-27 08:04:03
4    2015-07-27 08:36:17
5    2015-07-27 08:36:48
7    2015-07-27 08:53:09
19   2015-07-27 09:22:39
23   2015-07-27 09:34:18
29   2015-07-27 09:43:48
30   2015-07-27 09:44:20
32   2015-07-27 09:53:51
34   2015-07-27 10:05:52
35   2015-07-27 10:06:25
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-27 08:04:03 to 2015-07-27 08:36:17 because start = 0 and end = 1
Ignoring trip from 2015-07-27 08:04:03 to 2015-07-27 08:36:17 because start = 0 and end = 1
Considering trip from 2015-07-27 08:36:17 to 2015-07-27 08:57:04 because start = 1 and end = 2
Ignoring trip from 2015-07-27 08:36:17 to 2015-07-27 08:57:04 because start = 1 and end = 2
Considering trip from 2015-07-27 08:57:04 to 2015-07-27 09:06:16 because start = 2 and end = 4
Ignoring trip from 2015-07-27 08:57:04 to 2015-07-27 09:06:16 because start = 2 and end = 4
Considering trip from 2015-07-27 09:06:16 to 2015-07-27 09:29:50 because start = 4 and end = 5
Ignoring trip from 2015-07-27 09:06:16 to 2015-07-27 09:29:50 because start = 4 and end = 5
Considering trip from 2015-07-27 09:29:50 to 2015-07-27 10:41:41 because start = 5 and end = 7
Ignoring trip from 2015-07-27 09:29:50 to 2015-07-27 10:41:41 because start = 5 and end = 7
Considering trip from 2015-07-27 10:41:41 to 2015-07-27 12:24:22 because start = 7 and end = 19
Considering trip from 2015-07-27 12:24:22 to 2015-07-27 12:34:45 because start = 19 and end = 23
Considering trip from 2015-07-27 12:34:45 to 2015-07-27 13:27:23 because start = 23 and end = 29
Considering trip from 2015-07-27 13:27:23 to 2015-07-27 13:51:06 because start = 29 and end = 30
Ignoring trip from 2015-07-27 13:27:23 to 2015-07-27 13:51:06 because start = 29 and end = 30
Considering trip from 2015-07-27 13:51:06 to 2015-07-27 13:59:48 because start = 30 and end = 32
Ignoring trip from 2015-07-27 13:51:06 to 2015-07-27 13:59:48 because start = 30 and end = 32
Considering trip from 2015-07-27 13:59:48 to 2015-07-27 17:27:47 because start = 32 and end = 34
Ignoring trip from 2015-07-27 13:59:48 to 2015-07-27 17:27:47 because start = 32 and end = 34
Considering trip from 2015-07-27 17:27:47 to 2015-07-27 17:28:54 because start = 34 and end = 35
Ignoring trip from 2015-07-27 17:27:47 to 2015-07-27 17:28:54 because start = 34 and end = 35
Out[133]:

In [134]:
tom_dist_filter_map_list_28 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(28), get_jul_dt(29))
ipy.inline_maps(tom_dist_filter_map_list_28, len(tom_dist_filter_map_list_28), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438070400000.0}}, {'data.mTime': {'$lt': 1438156800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 1, 2, 3, 4, 5, 6, 12, 19, 21, 27, 28, 29, 33, 54, 85, 111, 220], dtype='int64')
0     2015-07-28 06:49:08
1     2015-07-28 06:58:59
2     2015-07-28 07:30:29
3     2015-07-28 07:37:22
4     2015-07-28 07:48:52
5     2015-07-28 08:34:16
6     2015-07-28 08:50:38
12    2015-07-28 09:29:12
19    2015-07-28 09:42:46
21    2015-07-28 09:50:03
27    2015-07-28 09:58:50
28    2015-07-28 10:11:17
29    2015-07-28 10:18:02
33    2015-07-28 10:27:46
54    2015-07-28 13:46:52
85    2015-07-28 14:29:16
111   2015-07-28 15:08:12
220   2015-07-28 16:32:09
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-28 06:49:08 to 2015-07-28 06:58:59 because start = 0 and end = 1
Ignoring trip from 2015-07-28 06:49:08 to 2015-07-28 06:58:59 because start = 0 and end = 1
Considering trip from 2015-07-28 06:58:59 to 2015-07-28 07:30:29 because start = 1 and end = 2
Ignoring trip from 2015-07-28 06:58:59 to 2015-07-28 07:30:29 because start = 1 and end = 2
Considering trip from 2015-07-28 07:30:29 to 2015-07-28 07:37:22 because start = 2 and end = 3
Ignoring trip from 2015-07-28 07:30:29 to 2015-07-28 07:37:22 because start = 2 and end = 3
Considering trip from 2015-07-28 07:37:22 to 2015-07-28 07:48:52 because start = 3 and end = 4
Ignoring trip from 2015-07-28 07:37:22 to 2015-07-28 07:48:52 because start = 3 and end = 4
Considering trip from 2015-07-28 07:48:52 to 2015-07-28 08:34:16 because start = 4 and end = 5
Ignoring trip from 2015-07-28 07:48:52 to 2015-07-28 08:34:16 because start = 4 and end = 5
Considering trip from 2015-07-28 08:34:16 to 2015-07-28 08:50:38 because start = 5 and end = 6
Ignoring trip from 2015-07-28 08:34:16 to 2015-07-28 08:50:38 because start = 5 and end = 6
Considering trip from 2015-07-28 08:50:38 to 2015-07-28 09:29:12 because start = 6 and end = 12
Considering trip from 2015-07-28 09:29:12 to 2015-07-28 09:42:46 because start = 12 and end = 19
Considering trip from 2015-07-28 09:42:46 to 2015-07-28 09:50:03 because start = 19 and end = 21
Ignoring trip from 2015-07-28 09:42:46 to 2015-07-28 09:50:03 because start = 19 and end = 21
Considering trip from 2015-07-28 09:50:03 to 2015-07-28 09:58:50 because start = 21 and end = 27
Considering trip from 2015-07-28 09:58:50 to 2015-07-28 10:11:17 because start = 27 and end = 28
Ignoring trip from 2015-07-28 09:58:50 to 2015-07-28 10:11:17 because start = 27 and end = 28
Considering trip from 2015-07-28 10:11:17 to 2015-07-28 10:18:02 because start = 28 and end = 29
Ignoring trip from 2015-07-28 10:11:17 to 2015-07-28 10:18:02 because start = 28 and end = 29
Considering trip from 2015-07-28 10:18:02 to 2015-07-28 10:27:46 because start = 29 and end = 33
Considering trip from 2015-07-28 10:27:46 to 2015-07-28 13:46:52 because start = 33 and end = 54
Considering trip from 2015-07-28 13:46:52 to 2015-07-28 14:29:16 because start = 54 and end = 85
Considering trip from 2015-07-28 14:29:16 to 2015-07-28 15:08:12 because start = 85 and end = 111
Considering trip from 2015-07-28 15:08:12 to 2015-07-28 16:32:09 because start = 111 and end = 220
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438070400000.0}}, {'data.mTime': {'$lt': 1438156800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([5, 6, 7, 9, 11, 13, 16, 18, 20, 22, 24, 26, 27, 28, 30, 32, 34, 36, 38, 40, 41, 42, 43, 45, 49, 51, 53, 56, 58, 60, 65, 67, 68, 69, 70, 71, 109, 123, 129, 130, 140, 141, 146, 160, 164, 171, 199, 210], dtype='int64')
filtered list size went from (221, 28) to (173, 28)
speedThreshold = 1275
Found 2 potential outliers, list = [1 2]
Only one candidate, cluster centers are [array([1, 2])]
Considering candidate cluster center [1 2]
lowRange = max(-4, 0) = 0 and highRange = max(6, 173) = 6
Area size = 7, index = Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') with size 7
Deleted 1 points through ransac filtering
Retain mask is of size 7
Accuracy filtered df shape is (173, 28), ransac_mask size = 173
filtered split indices are Int64Index([0, 2, 3, 4, 5, 7, 11, 12, 15, 17, 21, 27, 36, 49, 74, 172], dtype='int64')
0     2015-07-28 06:49:08
2     2015-07-28 07:30:29
3     2015-07-28 07:37:22
4     2015-07-28 07:48:52
5     2015-07-28 08:34:16
7     2015-07-28 08:51:12
11    2015-07-28 08:57:23
12    2015-07-28 09:29:12
15    2015-07-28 09:33:20
17    2015-07-28 09:34:55
21    2015-07-28 09:50:03
27    2015-07-28 09:58:50
36    2015-07-28 10:30:20
49    2015-07-28 10:43:13
74    2015-07-28 14:10:06
172   2015-07-28 15:52:59
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-28 06:49:08 to 2015-07-28 07:30:29 because start = 0 and end = 2
Ignoring trip from 2015-07-28 06:49:08 to 2015-07-28 07:30:29 because start = 0 and end = 2
Considering trip from 2015-07-28 07:30:29 to 2015-07-28 07:37:22 because start = 2 and end = 3
Ignoring trip from 2015-07-28 07:30:29 to 2015-07-28 07:37:22 because start = 2 and end = 3
Considering trip from 2015-07-28 07:37:22 to 2015-07-28 07:48:52 because start = 3 and end = 4
Ignoring trip from 2015-07-28 07:37:22 to 2015-07-28 07:48:52 because start = 3 and end = 4
Considering trip from 2015-07-28 07:48:52 to 2015-07-28 08:52:44 because start = 4 and end = 5
Ignoring trip from 2015-07-28 07:48:52 to 2015-07-28 08:52:44 because start = 4 and end = 5
Considering trip from 2015-07-28 08:52:44 to 2015-07-28 09:29:12 because start = 5 and end = 7
Ignoring trip from 2015-07-28 08:52:44 to 2015-07-28 09:29:12 because start = 5 and end = 7
Considering trip from 2015-07-28 09:29:12 to 2015-07-28 09:42:46 because start = 7 and end = 11
Considering trip from 2015-07-28 09:42:46 to 2015-07-28 09:50:03 because start = 11 and end = 12
Ignoring trip from 2015-07-28 09:42:46 to 2015-07-28 09:50:03 because start = 11 and end = 12
Considering trip from 2015-07-28 09:50:03 to 2015-07-28 10:18:02 because start = 12 and end = 15
Ignoring trip from 2015-07-28 09:50:03 to 2015-07-28 10:18:02 because start = 12 and end = 15
Considering trip from 2015-07-28 10:18:02 to 2015-07-28 10:27:46 because start = 15 and end = 17
Ignoring trip from 2015-07-28 10:18:02 to 2015-07-28 10:27:46 because start = 15 and end = 17
Considering trip from 2015-07-28 10:27:46 to 2015-07-28 10:38:09 because start = 17 and end = 21
Considering trip from 2015-07-28 10:38:09 to 2015-07-28 13:46:52 because start = 21 and end = 27
Considering trip from 2015-07-28 13:46:52 to 2015-07-28 14:07:13 because start = 27 and end = 36
Considering trip from 2015-07-28 14:07:13 to 2015-07-28 14:29:16 because start = 36 and end = 49
Considering trip from 2015-07-28 14:29:16 to 2015-07-28 15:08:12 because start = 49 and end = 74
Considering trip from 2015-07-28 15:08:12 to 2015-07-28 16:32:09 because start = 74 and end = 172
Out[134]:

In [135]:
tom_dist_filter_map_list_29 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(29), get_jul_dt(30))
ipy.inline_maps(tom_dist_filter_map_list_29, len(tom_dist_filter_map_list_29), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438156800000.0}}, {'data.mTime': {'$lt': 1438243200000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 52, 79, 93, 94, 98, 133, 137], dtype='int64')
0     2015-07-29 06:31:34
52    2015-07-29 07:18:32
79    2015-07-29 17:37:42
93    2015-07-29 18:01:39
94    2015-07-29 18:07:35
98    2015-07-29 18:21:45
133   2015-07-29 19:00:25
137   2015-07-29 19:05:06
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-29 06:31:34 to 2015-07-29 07:18:32 because start = 0 and end = 52
Considering trip from 2015-07-29 07:18:32 to 2015-07-29 17:37:42 because start = 52 and end = 79
Considering trip from 2015-07-29 17:37:42 to 2015-07-29 18:01:39 because start = 79 and end = 93
Considering trip from 2015-07-29 18:01:39 to 2015-07-29 18:07:35 because start = 93 and end = 94
Ignoring trip from 2015-07-29 18:01:39 to 2015-07-29 18:07:35 because start = 93 and end = 94
Considering trip from 2015-07-29 18:07:35 to 2015-07-29 18:21:45 because start = 94 and end = 98
Considering trip from 2015-07-29 18:21:45 to 2015-07-29 19:00:25 because start = 98 and end = 133
Considering trip from 2015-07-29 19:00:25 to 2015-07-29 19:05:06 because start = 133 and end = 137
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438156800000.0}}, {'data.mTime': {'$lt': 1438243200000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([93, 95, 105, 116, 122, 124, 126, 131], dtype='int64')
filtered list size went from (138, 28) to (130, 28)
speedThreshold = 34
Found 2 potential outliers, list = [113 114]
Only one candidate, cluster centers are [array([113, 114])]
Considering candidate cluster center [113 114]
lowRange = max(108, 0) = 108 and highRange = max(118, 130) = 118
Area size = 11, index = Int64Index([108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (130, 28), ransac_mask size = 130
filtered split indices are Int64Index([0, 52, 79, 93, 96, 125, 129], dtype='int64')
0     2015-07-29 06:31:34
52    2015-07-29 07:18:32
79    2015-07-29 17:37:42
93    2015-07-29 18:01:39
96    2015-07-29 18:10:53
125   2015-07-29 18:40:37
129   2015-07-29 18:45:39
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-29 06:31:34 to 2015-07-29 07:18:32 because start = 0 and end = 52
Considering trip from 2015-07-29 07:18:32 to 2015-07-29 17:37:42 because start = 52 and end = 79
Considering trip from 2015-07-29 17:37:42 to 2015-07-29 18:07:35 because start = 79 and end = 93
Considering trip from 2015-07-29 18:07:35 to 2015-07-29 18:21:45 because start = 93 and end = 96
Ignoring trip from 2015-07-29 18:07:35 to 2015-07-29 18:21:45 because start = 93 and end = 96
Considering trip from 2015-07-29 18:21:45 to 2015-07-29 19:00:25 because start = 96 and end = 125
Considering trip from 2015-07-29 19:00:25 to 2015-07-29 19:05:06 because start = 125 and end = 129
Out[135]:

In [136]:
tom_dist_filter_map_list_5 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_aug_dt(5), get_aug_dt(6))
ipy.inline_maps(tom_dist_filter_map_list_5, len(tom_dist_filter_map_list_5), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438761600000.0}}, {'data.mTime': {'$lt': 1438848000000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 31, 33, 39, 47], dtype='int64')
0    2015-08-05 18:35:36
31   2015-08-05 19:14:17
33   2015-08-05 19:22:33
39   2015-08-05 19:51:08
47   2015-08-05 20:03:10
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-08-05 18:35:36 to 2015-08-05 19:14:17 because start = 0 and end = 31
Considering trip from 2015-08-05 19:14:17 to 2015-08-05 19:22:33 because start = 31 and end = 33
Ignoring trip from 2015-08-05 19:14:17 to 2015-08-05 19:22:33 because start = 31 and end = 33
Considering trip from 2015-08-05 19:22:33 to 2015-08-05 19:51:08 because start = 33 and end = 39
Considering trip from 2015-08-05 19:51:08 to 2015-08-05 20:03:10 because start = 39 and end = 47
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438761600000.0}}, {'data.mTime': {'$lt': 1438848000000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([3, 4, 6, 8, 9, 10, 11, 39], dtype='int64')
filtered list size went from (48, 29) to (40, 29)
speedThreshold = 30
Found 1 potential outliers, list = [3]
Only one candidate, cluster centers are [array([3])]
Considering candidate cluster center [3]
lowRange = max(-2, 0) = 0 and highRange = max(8, 40) = 8
Area size = 9, index = Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype='int64') with size 9
Deleted 0 points through ransac filtering
Retain mask is of size 9
Accuracy filtered df shape is (40, 29), ransac_mask size = 40
filtered split indices are Int64Index([0, 24, 26, 32, 39], dtype='int64')
0    2015-08-05 18:35:36
24   2015-08-05 18:57:40
26   2015-08-05 18:59:12
32   2015-08-05 19:17:29
39   2015-08-05 19:51:08
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-08-05 18:35:36 to 2015-08-05 19:14:17 because start = 0 and end = 24
Considering trip from 2015-08-05 19:14:17 to 2015-08-05 19:22:33 because start = 24 and end = 26
Ignoring trip from 2015-08-05 19:14:17 to 2015-08-05 19:22:33 because start = 24 and end = 26
Considering trip from 2015-08-05 19:22:33 to 2015-08-05 19:51:39 because start = 26 and end = 32
Considering trip from 2015-08-05 19:51:39 to 2015-08-05 20:03:10 because start = 32 and end = 39
Out[136]:

In [137]:
my_time_filter_map_list_6 = get_filter_compare(lq.get_uuid_list()[0], "time", get_aug_dt(6), get_aug_dt(7))
ipy.inline_maps(my_time_filter_map_list_6, len(my_time_filter_map_list_6), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438848000000.0}}, {'data.mTime': {'$lt': 1438934400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 267, 268, 316], dtype='int64')
0     2015-08-06 08:31:14
267   2015-08-06 13:15:15
268   2015-08-06 15:45:06
316   2015-08-06 16:07:41
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-08-06 08:31:14 to 2015-08-06 13:15:15 because start = 0 and end = 267
Considering trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 267 and end = 268
Ignoring trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 267 and end = 268
Considering trip from 2015-08-06 15:45:06 to 2015-08-06 16:07:41 because start = 268 and end = 316
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438848000000.0}}, {'data.mTime': {'$lt': 1438934400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
filtering points Int64Index([3, 16, 17, 18, 19, 20, 22, 23, 26, 27, 29, 31, 32, 33, 34, 35, 38, 39, 40, 41, 44, 45, 53, 66, 67, 68, 69, 70, 73, 74, 75, 76, 77, 80, 81, 82, 83, 85, 87, 93, 105, 106, 115, 116, 117, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 144, 145, 146, 147, 148, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 173, 174, 175, 176, 177, 178, 179, 180, ...], dtype='int64')
filtered list size went from (317, 29) to (166, 29)
speedThreshold = 34
Found 2 potential outliers, list = [18 81]
Only one candidate, cluster centers are [array([18, 81])]
Considering candidate cluster center [18 81]
lowRange = max(13, 0) = 13 and highRange = max(23, 166) = 23
Area size = 11, index = Int64Index([13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (166, 29), ransac_mask size = 166
filtered split indices are Int64Index([0, 74, 80, 81, 85, 137, 138, 165], dtype='int64')
0     2015-08-06 08:31:14
74    2015-08-06 09:09:51
80    2015-08-06 09:12:51
81    2015-08-06 09:13:21
85    2015-08-06 09:15:22
137   2015-08-06 09:44:23
138   2015-08-06 09:45:00
165   2015-08-06 09:58:55
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-08-06 08:31:14 to 2015-08-06 09:45:24 because start = 0 and end = 74
Considering trip from 2015-08-06 09:45:24 to 2015-08-06 10:02:24 because start = 74 and end = 80
Considering trip from 2015-08-06 10:02:24 to 2015-08-06 10:09:25 because start = 80 and end = 81
Ignoring trip from 2015-08-06 10:02:24 to 2015-08-06 10:09:25 because start = 80 and end = 81
Considering trip from 2015-08-06 10:09:25 to 2015-08-06 10:18:58 because start = 81 and end = 85
Considering trip from 2015-08-06 10:18:58 to 2015-08-06 13:15:15 because start = 85 and end = 137
Considering trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 137 and end = 138
Ignoring trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 137 and end = 138
Considering trip from 2015-08-06 15:45:06 to 2015-08-06 16:03:01 because start = 138 and end = 165
Out[137]:

In [138]:
unfiltered_maps = get_map_list(lq.get_uuid_list()[0], "time", get_aug_dt(6), get_aug_dt(7))
print len(unfiltered_maps)
filtered_maps = get_filtered_map_list(lq.get_uuid_list()[0], "time", get_aug_dt(6), get_aug_dt(7))
print len(filtered_maps)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438848000000.0}}, {'data.mTime': {'$lt': 1438934400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
original split indices are Int64Index([0, 267, 268, 316], dtype='int64')
0     2015-08-06 08:31:14
267   2015-08-06 13:15:15
268   2015-08-06 15:45:06
316   2015-08-06 16:07:41
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-08-06 08:31:14 to 2015-08-06 13:15:15 because start = 0 and end = 267
Considering trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 267 and end = 268
Ignoring trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 267 and end = 268
Considering trip from 2015-08-06 15:45:06 to 2015-08-06 16:07:41 because start = 268 and end = 316
2
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1438848000000.0}}, {'data.mTime': {'$lt': 1438934400000.0}}], 'user_id': UUID('0763de67-f61e-3f5d-90e7-518e69793954'), 'metadata.filter': 'time'} 
filtering points Int64Index([3, 16, 17, 18, 19, 20, 22, 23, 26, 27, 29, 31, 32, 33, 34, 35, 38, 39, 40, 41, 44, 45, 53, 66, 67, 68, 69, 70, 73, 74, 75, 76, 77, 80, 81, 82, 83, 85, 87, 93, 105, 106, 115, 116, 117, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 144, 145, 146, 147, 148, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 173, 174, 175, 176, 177, 178, 179, 180, ...], dtype='int64')
filtered list size went from (317, 29) to (166, 29)
speedThreshold = 34
Found 2 potential outliers, list = [18 81]
Only one candidate, cluster centers are [array([18, 81])]
Considering candidate cluster center [18 81]
lowRange = max(13, 0) = 13 and highRange = max(23, 166) = 23
Area size = 11, index = Int64Index([13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23], dtype='int64') with size 11
Deleted 0 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (166, 29), ransac_mask size = 166
filtered split indices are Int64Index([0, 74, 80, 81, 85, 137, 138, 165], dtype='int64')
0     2015-08-06 08:31:14
74    2015-08-06 09:09:51
80    2015-08-06 09:12:51
81    2015-08-06 09:13:21
85    2015-08-06 09:15:22
137   2015-08-06 09:44:23
138   2015-08-06 09:45:00
165   2015-08-06 09:58:55
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-08-06 08:31:14 to 2015-08-06 09:45:24 because start = 0 and end = 74
Considering trip from 2015-08-06 09:45:24 to 2015-08-06 10:02:24 because start = 74 and end = 80
Considering trip from 2015-08-06 10:02:24 to 2015-08-06 10:09:25 because start = 80 and end = 81
Ignoring trip from 2015-08-06 10:02:24 to 2015-08-06 10:09:25 because start = 80 and end = 81
Considering trip from 2015-08-06 10:09:25 to 2015-08-06 10:18:58 because start = 81 and end = 85
Considering trip from 2015-08-06 10:18:58 to 2015-08-06 13:15:15 because start = 85 and end = 137
Considering trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 137 and end = 138
Ignoring trip from 2015-08-06 13:15:15 to 2015-08-06 15:45:06 because start = 137 and end = 138
Considering trip from 2015-08-06 15:45:06 to 2015-08-06 16:03:01 because start = 138 and end = 165
5

In [139]:
tom_dist_filter_map_list_23 = get_filter_compare(lq.get_uuid_list()[2], "distance", get_jul_dt(23), get_jul_dt(24))
ipy.inline_maps(tom_dist_filter_map_list_23, len(tom_dist_filter_map_list_23), 2)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
original split indices are Int64Index([0, 2, 4, 64, 66, 77, 80, 110, 127, 238, 241, 245, 248], dtype='int64')
0     2015-07-23 01:14:32
2     2015-07-23 08:09:59
4     2015-07-23 08:16:09
64    2015-07-23 09:19:23
66    2015-07-23 09:28:08
77    2015-07-23 09:45:57
80    2015-07-23 09:56:17
110   2015-07-23 14:36:50
127   2015-07-23 15:04:13
238   2015-07-23 16:55:58
241   2015-07-23 17:15:31
245   2015-07-23 19:51:32
248   2015-07-23 19:59:22
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-23 01:14:32 to 2015-07-23 08:09:59 because start = 0 and end = 2
Ignoring trip from 2015-07-23 01:14:32 to 2015-07-23 08:09:59 because start = 0 and end = 2
Considering trip from 2015-07-23 08:09:59 to 2015-07-23 08:16:09 because start = 2 and end = 4
Ignoring trip from 2015-07-23 08:09:59 to 2015-07-23 08:16:09 because start = 2 and end = 4
Considering trip from 2015-07-23 08:16:09 to 2015-07-23 09:19:23 because start = 4 and end = 64
Considering trip from 2015-07-23 09:19:23 to 2015-07-23 09:28:08 because start = 64 and end = 66
Ignoring trip from 2015-07-23 09:19:23 to 2015-07-23 09:28:08 because start = 64 and end = 66
Considering trip from 2015-07-23 09:28:08 to 2015-07-23 09:45:57 because start = 66 and end = 77
Considering trip from 2015-07-23 09:45:57 to 2015-07-23 09:56:17 because start = 77 and end = 80
Ignoring trip from 2015-07-23 09:45:57 to 2015-07-23 09:56:17 because start = 77 and end = 80
Considering trip from 2015-07-23 09:56:17 to 2015-07-23 14:36:50 because start = 80 and end = 110
Considering trip from 2015-07-23 14:36:50 to 2015-07-23 15:04:13 because start = 110 and end = 127
Considering trip from 2015-07-23 15:04:13 to 2015-07-23 16:55:58 because start = 127 and end = 238
Considering trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 238 and end = 241
Ignoring trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 238 and end = 241
Considering trip from 2015-07-23 17:15:31 to 2015-07-23 19:51:32 because start = 241 and end = 245
Considering trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 245 and end = 248
Ignoring trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 245 and end = 248
final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437638400000.0}}, {'data.mTime': {'$lt': 1437724800000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 
filtering points Int64Index([0, 2, 61, 65, 66, 71, 77, 79, 80, 87, 88, 99, 120, 123, 125, 127, 129, 132, 133, 134, 135, 136, 152, 156, 158, 163, 165, 166, 168, 171, 173, 177, 179, 190, 192, 196, 208, 210, 213, 219, 224, 228, 230, 232], dtype='int64')
filtered list size went from (249, 28) to (205, 28)
speedThreshold = 143
Found 3 potential outliers, list = [68 69 70]
Only one candidate, cluster centers are [array([68, 69, 70])]
Considering candidate cluster center [68 69 70]
lowRange = max(63, 0) = 63 and highRange = max(73, 205) = 73
Area size = 11, index = Int64Index([63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73], dtype='int64') with size 11
Deleted 3 points through ransac filtering
Retain mask is of size 11
Accuracy filtered df shape is (205, 28), ransac_mask size = 205
filtered split indices are Int64Index([0, 1, 2, 61, 62, 72, 98, 112, 115, 138, 194, 197, 201, 204], dtype='int64')
0     2015-07-23 01:14:32
1     2015-07-23 01:14:37
2     2015-07-23 08:09:59
61    2015-07-23 09:06:22
62    2015-07-23 09:06:57
72    2015-07-23 09:33:26
98    2015-07-23 10:08:58
112   2015-07-23 14:38:45
115   2015-07-23 14:42:58
138   2015-07-23 15:15:26
194   2015-07-23 16:10:41
197   2015-07-23 16:12:54
201   2015-07-23 16:15:55
204   2015-07-23 16:18:00
Name: formatted_time, dtype: datetime64[ns]
Considering trip from 2015-07-23 01:14:37 to 2015-07-23 08:10:04 because start = 0 and end = 1
Ignoring trip from 2015-07-23 01:14:37 to 2015-07-23 08:10:04 because start = 0 and end = 1
Considering trip from 2015-07-23 08:10:04 to 2015-07-23 08:16:09 because start = 1 and end = 2
Ignoring trip from 2015-07-23 08:10:04 to 2015-07-23 08:16:09 because start = 1 and end = 2
Considering trip from 2015-07-23 08:16:09 to 2015-07-23 09:19:23 because start = 2 and end = 61
Considering trip from 2015-07-23 09:19:23 to 2015-07-23 09:29:14 because start = 61 and end = 62
Ignoring trip from 2015-07-23 09:19:23 to 2015-07-23 09:29:14 because start = 61 and end = 62
Considering trip from 2015-07-23 09:29:14 to 2015-07-23 09:56:53 because start = 62 and end = 72
Considering trip from 2015-07-23 09:56:53 to 2015-07-23 14:36:50 because start = 72 and end = 98
Considering trip from 2015-07-23 14:36:50 to 2015-07-23 15:05:21 because start = 98 and end = 112
Considering trip from 2015-07-23 15:05:21 to 2015-07-23 15:13:24 because start = 112 and end = 115
Ignoring trip from 2015-07-23 15:05:21 to 2015-07-23 15:13:24 because start = 112 and end = 115
Considering trip from 2015-07-23 15:13:24 to 2015-07-23 15:45:30 because start = 115 and end = 138
Considering trip from 2015-07-23 15:45:30 to 2015-07-23 16:55:58 because start = 138 and end = 194
Considering trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 194 and end = 197
Ignoring trip from 2015-07-23 16:55:58 to 2015-07-23 17:15:31 because start = 194 and end = 197
Considering trip from 2015-07-23 17:15:31 to 2015-07-23 19:51:32 because start = 197 and end = 201
Considering trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 201 and end = 204
Ignoring trip from 2015-07-23 19:51:32 to 2015-07-23 19:59:22 because start = 201 and end = 204
Out[139]:

In [139]:


In [139]: